Source code for xclim.sdba.measures

# noqa: D205,D400
"""
Measures Submodule
==================
SDBA diagnostic tests are made up of properties and measures. Measures compare adjusted simulations to a reference,
through statistical properties or directly. This framework for the diagnostic tests was inspired by the
`VALUE <http://www.value-cost.eu/>`_ project.

"""
from __future__ import annotations

import numpy as np
import xarray as xr

from xclim import sdba
from xclim.core.indicator import Indicator, base_registry
from xclim.core.units import convert_units_to, ensure_delta, units2pint
from xclim.core.utils import InputKind


[docs]class StatisticalMeasure(Indicator): """Base indicator class for statistical measures used when validating bias-adjusted outputs. Statistical measures either use input data where the time dimension was reduced, or they combine the reduction with the measure. They usually take two arrays as input: "sim" and "ref", "sim" being measured against "ref". Statistical measures are generally unit-generic. If the inputs have different units, "sim" is converted to match "ref". """ realm = "generic"
[docs] @classmethod def _ensure_correct_parameters(cls, parameters): inputs = {k for k, p in parameters.items() if p.kind == InputKind.VARIABLE} if not inputs.issuperset({"sim", "ref"}): raise ValueError( f"{cls.__name__} requires 'sim' and 'ref' as inputs. Got {inputs}." ) return super()._ensure_correct_parameters(parameters)
[docs] def _preprocess_and_checks(self, das, params): """Perform parent's checks and also check convert units so that sim matches ref.""" das, params = super()._preprocess_and_checks(das, params) # Convert grouping and check if allowed: sim = das["sim"] ref = das["ref"] units_sim = units2pint(sim) units_ref = units2pint(ref) if units_sim != units_ref: das["sim"] = convert_units_to(sim, ref) return das, params
base_registry["StatisticalMeasure"] = StatisticalMeasure
[docs]def _bias(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray: """Bias. The bias is the simulation minus the reference. Parameters ---------- sim : xr.DataArray data from the simulation (one value for each grid-point) ref : xr.DataArray data from the reference (observations) (one value for each grid-point) Returns ------- xr.DataArray, [same as ref] Absolute bias """ out = sim - ref out.attrs["units"] = ensure_delta(ref.attrs["units"]) return out
bias = StatisticalMeasure(identifier="bias", compute=_bias)
[docs]def _relative_bias(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray: """Relative Bias. The relative bias is the simulation minus reference, divided by the reference. Parameters ---------- sim : xr.DataArray data from the simulation (one value for each grid-point) ref : xr.DataArray data from the reference (observations) (one value for each grid-point) Returns ------- xr.DataArray, [dimensionless] Relative bias """ out = (sim - ref) / ref return out.assign_attrs(units="")
relative_bias = StatisticalMeasure( identifier="relative_bias", compute=_relative_bias, units="" )
[docs]def _circular_bias(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray: """Circular bias. Bias considering circular time series. E.g. The bias between doy 365 and doy 1 is 364, but the circular bias is -1. Parameters ---------- sim : xr.DataArray data from the simulation (one value for each grid-point) ref : xr.DataArray data from the reference (observations) (one value for each grid-point) Returns ------- xr.DataArray, [days] Circular bias """ out = (sim - ref) % 365 out = out.where( out <= 365 / 2, 365 - out ) # when condition false, replace by 2nd arg out = out.where(ref >= sim, out * -1) # when condition false, replace by 2nd arg return out.assign_attrs(units="days")
circular_bias = StatisticalMeasure( identifier="circular_bias", compute=_circular_bias, units="days" )
[docs]def _ratio(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray: """Ratio. The ratio is the quotient of the simulation over the reference. Parameters ---------- sim : xr.DataArray data from the simulation (one value for each grid-point) ref : xr.DataArray data from the reference (observations) (one value for each grid-point) Returns ------- xr.DataArray, [dimensionless] Ratio """ out = sim / ref out.attrs["units"] = "" return out
ratio = StatisticalMeasure(identifier="ratio", compute=_ratio, units="")
[docs]def _rmse(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray: """Root mean square error. The root mean square error on the time dimension between the simulation and the reference. Parameters ---------- sim : xr.DataArray Data from the simulation (a time-series for each grid-point) ref : xr.DataArray Data from the reference (observations) (a time-series for each grid-point) Returns ------- xr.DataArray, [same as ref] Root mean square error """ def _rmse(sim, ref): return np.sqrt(np.mean((sim - ref) ** 2, axis=-1)) out = xr.apply_ufunc( _rmse, sim, ref, input_core_dims=[["time"], ["time"]], dask="parallelized", ) return out.assign_attrs(units=ensure_delta(ref.units))
rmse = StatisticalMeasure(identifier="rmse", compute=_rmse)
[docs]def _mae(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray: """Mean absolute error. The mean absolute error on the time dimension between the simulation and the reference. Parameters ---------- sim : xr.DataArray data from the simulation (a time-series for each grid-point) ref : xr.DataArray data from the reference (observations) (a time-series for each grid-point) Returns ------- xr.DataArray, [same as ref] Mean absolute error """ def _mae(sim, ref): return np.mean(np.abs(sim - ref), axis=-1) out = xr.apply_ufunc( _mae, sim, ref, input_core_dims=[["time"], ["time"]], dask="parallelized", ) return out.assign_attrs(units=ensure_delta(ref.units))
mae = StatisticalMeasure(identifier="mae", compute=_mae)
[docs]def _annual_cycle_correlation( sim: xr.DataArray, ref: xr.DataArray, window: int = 15 ) -> xr.DataArray: """Annual cycle correlation. Pearson correlation coefficient between the smooth day-of-year averaged annual cycles of the simulation and the reference. In the smooth day-of-year averaged annual cycles, each day-of-year is averaged over all years and over a window of days around that day. Parameters ---------- sim : xr.DataArray data from the simulation (a time-series for each grid-point) ref : xr.DataArray data from the reference (observations) (a time-series for each grid-point) window: int Size of window around each day of year around which to take the mean. E.g. If window=31, Jan 1st is averaged over from December 17th to January 16th. Returns ------- xr.DataArray, [dimensionless] Annual cycle correlation """ # group by day-of-year and window around each doy grouper_test = sdba.base.Grouper("time.dayofyear", window=window) # for each day, mean over X day window and over all years to create a smooth avg annual cycle sim_annual_cycle = grouper_test.apply("mean", sim) ref_annual_cycle = grouper_test.apply("mean", ref) out = xr.corr(ref_annual_cycle, sim_annual_cycle, dim="dayofyear") return out.assign_attrs(units="")
annual_cycle_correlation = StatisticalMeasure( identifier="annual_cycle_correlation", compute=_annual_cycle_correlation )