Source code for xclim.sdba.measures

"""
Measures submodule
==================
SDBA diagnostic tests are made up of properties and measures. Measures compare adjusted simulations to a reference,
through statistical properties or directly.
This framework for the diagnostic tests was inspired by the [VALUE]_ project.

 .. [VALUE] http://www.value-cost.eu/
"""
from typing import Callable
from warnings import warn

import numpy as np
import xarray as xr
from boltons.funcutils import wraps
from sklearn import metrics

from xclim import sdba
from xclim.core.formatting import update_xclim_history
from xclim.core.units import convert_units_to, units2pint


[docs]def check_same_units_and_convert(func) -> Callable:
    """Verify that the simulation and the reference have the same units.
    If not, it converts the simulation to the units of the reference"""

    @wraps(
        func
    )  # in order to keep the docstring of the function where the decorator is applied
    def _check_same_units(*args):
        sim = args[0]
        ref = args[1]
        units_sim = units2pint(sim.units)
        units_ref = units2pint(ref.units)

        if units_sim != units_ref:
            warn(
                f" sim({units_sim}) and ref({units_ref}) don't have the same units."
                f" sim will be converted to {units_ref}."
            )
            sim = convert_units_to(sim, ref)
        out = func(sim, ref, *args[2:])
        return out

    return _check_same_units


[docs]@check_same_units_and_convert
@update_xclim_history
def bias(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray:
    """Bias.

    The bias is the simulation minus the reference.

    Parameters
    ----------
    sim : xr.DataArray
      data from the simulation (one value for each grid-point)
    ref : xr.DataArray
      data from the reference (observations) (one value for each grid-point)

    Returns
    -------
    xr.DataArray,
      Bias between the simulation and the reference

    """
    out = sim - ref
    out.attrs.update(sim.attrs)
    out.attrs["long_name"] = "Bias"
    out.attrs["units"] = sim.attrs["units"]
    return out


[docs]@check_same_units_and_convert
@update_xclim_history
def relative_bias(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray:
    """Relative Bias.

    The relative bias is the simulation minus reference, divided by the reference.

    Parameters
    ----------
    sim : xr.DataArray
      data from the simulation (one value for each grid-point)
    ref : xr.DataArray
      data from the reference (observations) (one value for each grid-point)

    Returns
    -------
    xr.DataArray,
      Relative bias between the simulation and the reference

    """
    out = (sim - ref) / ref
    out.attrs.update(sim.attrs)
    out.attrs["long_name"] = "Relative bias"
    out.attrs["units"] = ""
    return out


[docs]@check_same_units_and_convert
@update_xclim_history
def circular_bias(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray:
    """Circular bias.

    Bias considering circular time series.
    E.g. The bias between doy 365 and doy 1 is 364, but the circular bias is -1.

    Parameters
    ----------
    sim : xr.DataArray
      data from the simulation (one value for each grid-point)
    ref : xr.DataArray
      data from the reference (observations) (one value for each grid-point)

    Returns
    -------
    xr.DataArray,
      Circular bias between the simulation and the reference

    """
    out = (sim - ref) % 365
    out = out.where(
        out <= 365 / 2, 365 - out
    )  # when condition false, replace by 2nd arg
    out = out.where(ref >= sim, out * -1)  # when condition false, replace by 2nd arg
    out.attrs.update(sim.attrs)
    out.attrs["long_name"] = "Circular bias"
    return out


[docs]@check_same_units_and_convert
@update_xclim_history
def ratio(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray:
    """Ratio.

    The ratio is the quotient of the simulation over the reference.

    Parameters
    ----------
    sim : xr.DataArray
      data from the simulation (one value for each grid-point)
    ref : xr.DataArray
      data from the reference (observations) (one value for each grid-point)

    Returns
    -------
    xr.DataArray,
      Ratio between the simulation and the reference

    """
    out = sim / ref
    out.attrs.update(sim.attrs)
    out.attrs["long_name"] = "Ratio"
    out.attrs["units"] = ""
    return out


[docs]@check_same_units_and_convert
@update_xclim_history
def rmse(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray:
    """Root mean square error.

    The root mean square error on the time dimension between the simulation and the reference.

    Parameters
    ----------
    sim : xr.DataArray
      Data from the simulation (a time-series for each grid-point)
    ref : xr.DataArray
      Data from the reference (observations) (a time-series for each grid-point)

    Returns
    -------
    xr.DataArray,
      Root mean square error between the simulation and the reference

    See also
    --------
    sklearn.metrics.mean_squared_error

    """

    def _nan_sklearn(simulation, reference):
        if np.isnan(simulation[0]):  # sklearn can't handle the NaNs
            return np.nan
        return metrics.mean_squared_error(simulation, reference, squared=False)

    out = xr.apply_ufunc(
        _nan_sklearn,
        sim,
        ref,
        input_core_dims=[["time"], ["time"]],
        vectorize=True,
        dask="parallelized",
    )
    out.attrs.update(sim.attrs)
    out.attrs["long_name"] = "Root mean square"
    return out


[docs]@check_same_units_and_convert
@update_xclim_history
def mae(sim: xr.DataArray, ref: xr.DataArray) -> xr.DataArray:
    """Mean absolute error.

    The mean absolute error on the time dimension between the simulation and the reference.

    Parameters
    ----------
    sim : xr.DataArray
      data from the simulation (a time-series for each grid-point)
    ref : xr.DataArray
      data from the reference (observations) (a time-series for each grid-point)

    Returns
    -------
    xr.DataArray,
      Mean absolute error between the simulation and the reference

    See also
    --------
    sklearn.metrics.mean_absolute_error

    """

    def _nan_sklearn(simulation, reference):
        if np.any(np.isnan(simulation)):  # sklearn can't handle the NaNs
            return np.nan
        return metrics.mean_absolute_error(simulation, reference)

    out = xr.apply_ufunc(
        _nan_sklearn,
        sim,
        ref,
        input_core_dims=[["time"], ["time"]],
        vectorize=True,
        dask="parallelized",
    )
    out.attrs.update(sim.attrs)
    out.attrs["long_name"] = "Mean absolute error"
    return out


[docs]@check_same_units_and_convert
@update_xclim_history
def annual_cycle_correlation(sim, ref, window: int = 15):
    """Annual cycle correlation.

    Pearson correlation coefficient between the smooth day-of-year averaged annual cycles of the simulation and
    the reference. In the smooth day-of-year averaged annual cycles, each day-of-year is averaged over all years
    and over a window of days around that day.

    Parameters
    ----------
    sim : xr.DataArray
      data from the simulation (a time-series for each grid-point)
    ref : xr.DataArray
      data from the reference (observations) (a time-series for each grid-point)
    window: int
      Size of window around each day of year around which to take the mean.
      E.g. If window=31, Jan 1st is averaged over from December 17th to January 16th.

    Returns
    -------
    xr.DataArray,
      Annual cycle correlation between the simulation and the reference

    """
    # group by day-of-year and window around each doy
    grouper_test = sdba.base.Grouper("time.dayofyear", window=window)
    # for each day, mean over X day window and over all years to create a smooth avg annual cycle
    sim_annual_cycle = grouper_test.apply("mean", sim)
    ref_annual_cycle = grouper_test.apply("mean", ref)
    out = xr.corr(ref_annual_cycle, sim_annual_cycle, dim="dayofyear")
    out.attrs.update(sim.attrs)
    out.attrs["long_name"] = "Correlation of the annual cycle"
    return out