Source code for xclim.indices._hydrology

"""Hydrological indice definitions."""

from __future__ import annotations

from functools import partial

import numpy as np
import pandas as pd
import xarray
from scipy.stats import circmean, rv_continuous
from xarray import DataArray

from xclim.core._types import DateStr, Quantified
from xclim.core.calendar import get_calendar
from xclim.core.missing import at_least_n_valid
from xclim.core.units import convert_units_to, declare_units, rate2amount, to_agg_units
from xclim.indices.generic import threshold_count
from xclim.indices.stats import standardized_index

from . import generic

try:
    import pymannkendall as mk
except ImportError:
    mk = None

__all__ = [
    "antecedent_precipitation_index",
    "aridity_index",
    "base_flow_index",
    "base_flow_index_seasonal_ratio",
    "days_with_snowpack",
    "flow_index",
    "high_flow_frequency",
    "lag_snowpack_flow_peaks",
    "low_flow_frequency",
    "melt_and_precip_max",
    "rb_flashiness_index",
    "runoff_ratio",
    "sen_slope",
    "snd_max",
    "snd_max_doy",
    "snow_melt_we_max",
    "snw_max",
    "snw_max_doy",
    "standardized_groundwater_index",
    "standardized_streamflow_index",
]



[docs]
@declare_units(q="[discharge]")
def base_flow_index(q: xarray.DataArray, freq: str = "YS") -> xarray.DataArray:
    r"""
    Base flow index.

    Return the base flow index, defined as the minimum 7-day average flow divided by the mean flow.

    Parameters
    ----------
    q : xarray.DataArray
        Rate of river discharge.
    freq : str
        Resampling frequency.

    Returns
    -------
    xarray.DataArray, [dimensionless]
        Base flow index.

    Notes
    -----
    Let :math:`\mathbf{q}=q_0, q_1, \ldots, q_n` be the sequence of daily discharge and :math:`\overline{\mathbf{q}}`
    the mean flow over the period. The base flow index is given by:

    .. math::

       \frac{\min(\mathrm{CMA}_7(\mathbf{q}))}{\overline{\mathbf{q}}}

    where :math:`\mathrm{CMA}_7` is the seven days moving average of the daily flow:

    .. math::

       \mathrm{CMA}_7(q_i) = \frac{\sum_{j=i-3}^{i+3} q_j}{7}
    """
    m7 = q.rolling(time=7, center=True).mean(skipna=False).resample(time=freq)
    mq = q.resample(time=freq)

    m7m = m7.min(dim="time")
    out = m7m / mq.mean(dim="time")
    out.attrs["units"] = ""
    return out




[docs]
@declare_units(q="[discharge]")
def rb_flashiness_index(q: xarray.DataArray, freq: str = "YS") -> xarray.DataArray:
    r"""
    Richards-Baker flashiness index.

    Measures oscillations in flow relative to total flow, quantifying the frequency and rapidity of short term changes
    in flow, based on :cite:t:`baker_new_2004`.

    Parameters
    ----------
    q : xarray.DataArray
        Rate of river discharge.
    freq : str
        Resampling frequency.

    Returns
    -------
    xarray.DataArray, [dimensionless]
        R-B Index.

    Notes
    -----
    Let :math:`\mathbf{q}=q_0, q_1, \ldots, q_n` be the sequence of daily discharge, the R-B Index is given by:

    .. math::

       \frac{\sum_{i=1}^n |q_i - q_{i-1}|}{\sum_{i=1}^n q_i}

    References
    ----------
    :cite:cts:`baker_new_2004`
    """
    d = np.abs(q.diff(dim="time")).resample(time=freq)
    mq = q.resample(time=freq)
    out = d.sum(dim="time") / mq.sum(dim="time")
    out.attrs["units"] = ""
    return out




[docs]
@declare_units(
    q="[discharge]",
    params="[]",
)
def standardized_streamflow_index(
    q: xarray.DataArray,
    freq: str | None = "MS",
    window: int = 1,
    dist: str | rv_continuous = "genextreme",
    method: str = "ML",
    fitkwargs: dict | None = None,
    cal_start: DateStr | None = None,
    cal_end: DateStr | None = None,
    params: Quantified | None = None,
    **indexer,
) -> xarray.DataArray:
    r"""
    Standardized Streamflow Index (SSI).

    Parameters
    ----------
    q : xarray.DataArray
        Rate of river discharge.
    freq : str, optional
        Resampling frequency. A monthly or daily frequency is expected. Option `None` assumes
        that the desired resampling has already been applied input dataset and will skip the resampling step.
    window : int
        Averaging window length relative to the resampling frequency. For example, if `freq="MS"`,
        i.e. a monthly resampling, the window is an integer number of months.
    dist : {"genextreme", "fisk"} or `rv_continuous` function
        Name of the univariate distribution, or a callable `rv_continuous` (see :py:mod:`scipy.stats`).
    method : {"APP", "ML", "PWM"}
        Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate). The approximate method
        uses a deterministic function that does not involve any optimization.
        `PWM` should be used with a `lmoments3` distribution.
    fitkwargs : dict, optional
        Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certain parameters (`floc`, `fscale`).
    cal_start : DateStr, optional
        Start date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`.
        Default option `None` means that the calibration period begins at the start of the input dataset.
    cal_end : DateStr, optional
        End date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`.
        Default option `None` means that the calibration period finishes at the end of the input dataset.
    params : xarray.DataArray, optional
        Fit parameters.
        The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance.
        The output can be given here as input, and it overrides other options.
    **indexer : Indexer
        Indexing parameters to compute the indicator on a temporal subset of the data.
        It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`.

    Returns
    -------
    xarray.DataArray, [unitless]
        Standardized Streamflow Index.

    See Also
    --------
    xclim.indices._agro.standardized_precipitation_index : Standardized Precipitation Index.
    xclim.indices.stats.standardized_index : Standardized Index.
    xclim.indices.stats.standardized_index_fit_params : Standardized Index Fit Params.

    Notes
    -----
    * N-month SSI / N-day SSI is determined by choosing the `window = N` and the appropriate frequency `freq`.
    * Supported statistical distributions are: ["genextreme", "fisk"], where "fisk" is scipy's implementation of
       a log-logistic distribution.
    * If `params` is provided, it overrides the `cal_start`, `cal_end`, `freq`, `window`, `dist`, and `method` options.
    * "APP" method only supports two-parameter distributions. Parameter `loc` needs to be fixed to use method "APP".
    * The standardized index is bounded by ±8.21. 8.21 is the largest standardized index as constrained by the
      float64 precision in the inversion to the normal distribution.

    References
    ----------
    :cite:cts:`vicente-serrano_2012`

    Examples
    --------
    >>> from datetime import datetime
    >>> from xclim.indices import standardized_streamflow_index
    >>> ds = xr.open_dataset(path_to_q_file)
    >>> q = ds.q_sim
    >>> cal_start, cal_end = "2006-05-01", "2008-06-01"
    >>> ssi_3 = standardized_streamflow_index(
    ...     q,
    ...     freq="MS",
    ...     window=3,
    ...     dist="genextreme",
    ...     method="ML",
    ...     cal_start=cal_start,
    ...     cal_end=cal_end,
    ... )  # Computing SSI-3 months using a GEV distribution for the fit
    >>> # Fitting parameters can also be obtained first, then reused as input.
    >>> from xclim.indices.stats import standardized_index_fit_params
    >>> params = standardized_index_fit_params(
    ...     q.sel(time=slice(cal_start, cal_end)),
    ...     freq="MS",
    ...     window=3,
    ...     dist="genextreme",
    ...     method="ML",
    ... )  # First getting params
    >>> ssi_3 = standardized_streamflow_index(q, params=params)
    """
    fitkwargs = fitkwargs or {}
    dist_methods = {
        # FIXME: xclim-v1 — remove "APP"
        "genextreme": ["ML", "APP"],
        "fisk": ["ML", "APP"],
    }
    if isinstance(dist, str):
        if dist in dist_methods:
            if method not in dist_methods[dist]:
                raise NotImplementedError(f"{method} method is not implemented for {dist} distribution")
        else:
            raise NotImplementedError(f"{dist} distribution is not yet implemented.")

    zero_inflated = False
    ssi = standardized_index(
        q,
        freq=freq,
        window=window,
        dist=dist,
        method=method,
        zero_inflated=zero_inflated,
        fitkwargs=fitkwargs,
        cal_start=cal_start,
        cal_end=cal_end,
        params=params,
        **indexer,
    )

    return ssi




[docs]
@declare_units(snd="[length]")
def snd_max(snd: xarray.DataArray, freq: str = "YS-JUL") -> xarray.DataArray:
    """
    Maximum snow depth.

    The maximum daily snow depth.

    Parameters
    ----------
    snd : xarray.DataArray
        Snow depth (mass per area).
    freq : str
        Resampling frequency.

    Returns
    -------
    xarray.DataArray
        The maximum snow depth over a given number of days for each period. [length].
    """
    return generic.select_resample_op(snd, op="max", freq=freq)




[docs]
@declare_units(snd="[length]")
def snd_max_doy(snd: xarray.DataArray, freq: str = "YS-JUL") -> xarray.DataArray:
    """
    Day of year of maximum snow depth.

    Day of year when surface snow reaches its peak value. If snow depth is 0 over entire period, return NaN.

    Parameters
    ----------
    snd : xarray.DataArray
        Surface snow depth.
    freq : str
         Resampling frequency.

    Returns
    -------
    xarray.DataArray
        The day of year at which snow depth reaches its maximum value.
    """
    # Identify periods where there is at least one non-null value for snow depth
    valid = at_least_n_valid(snd.where(snd > 0), n=1, freq=freq)

    # Compute doymax. Will return first time step if all snow depths are 0.
    out = generic.select_resample_op(snd.where(snd > 0, 0), op=generic.doymax, freq=freq)
    out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(snd))

    # Mask arrays that miss at least one non-null snd.
    return out.where(~valid)




[docs]
@declare_units(snw="[mass]/[area]")
def snw_max(snw: xarray.DataArray, freq: str = "YS-JUL") -> xarray.DataArray:
    """
    Maximum snow amount.

    The maximum daily snow amount.

    Parameters
    ----------
    snw : xarray.DataArray
        Snow amount (mass per area).
    freq : str
        Resampling frequency.

    Returns
    -------
    xarray.DataArray
        The maximum snow amount over a given number of days for each period. [mass/area].
    """
    return generic.select_resample_op(snw, op="max", freq=freq)




[docs]
@declare_units(snw="[mass]/[area]")
def snw_max_doy(snw: xarray.DataArray, freq: str = "YS-JUL") -> xarray.DataArray:
    """
    Day of year of maximum snow amount.

    Day of year when surface snow amount reaches its peak value. If snow amount is 0 over entire period, return NaN.

    Parameters
    ----------
    snw : xarray.DataArray
        Surface snow amount.
    freq : str
        Resampling frequency.

    Returns
    -------
    xarray.DataArray
        The day of year at which snow amount reaches its maximum value.
    """
    # Identify periods where there is at least one non-null value for snow depth
    valid = at_least_n_valid(snw.where(snw > 0), n=1, freq=freq)

    # Compute doymax. Will return first time step if all snow depths are 0.
    out = generic.select_resample_op(snw.where(snw > 0, 0), op=generic.doymax, freq=freq)
    out.attrs.update(units="", is_dayofyear=np.int32(1), calendar=get_calendar(snw))

    # Mask arrays that miss at least one non-null snd.
    return out.where(~valid)




[docs]
@declare_units(snw="[mass]/[area]")
def snow_melt_we_max(snw: xarray.DataArray, window: int = 3, freq: str = "YS-JUL") -> xarray.DataArray:
    """
    Maximum snow melt.

    The maximum snow melt over a given number of days expressed in snow water equivalent.

    Parameters
    ----------
    snw : xarray.DataArray
        Snow amount (mass per area).
    window : int
        Number of days during which the melt is accumulated.
    freq : str
        Resampling frequency.

    Returns
    -------
    xarray.DataArray
        The maximum snow melt over a given number of days for each period. [mass/area].
    """
    # Compute change in SWE. Set melt as a positive change.
    dsnw = snw.diff(dim="time") * -1

    # Sum over window
    agg = dsnw.rolling(time=window).sum()

    # Max over period
    out = agg.resample(time=freq).max(dim="time")
    out.attrs["units"] = snw.units
    return out




[docs]
@declare_units(snw="[mass]/[area]", pr="[precipitation]")
def melt_and_precip_max(
    snw: xarray.DataArray, pr: xarray.DataArray, window: int = 3, freq: str = "YS-JUL"
) -> xarray.DataArray:
    """
    Maximum snow melt and precipitation.

    The maximum snow melt plus precipitation over a given number of days expressed in snow water equivalent.

    Parameters
    ----------
    snw : xarray.DataArray
        Snow amount (mass per area).
    pr : xarray.DataArray
        Daily precipitation flux.
    window : int
        Number of days during which the water input is accumulated.
    freq : str
        Resampling frequency.

    Returns
    -------
    xarray.DataArray
        The maximum snow melt plus precipitation over a given number of days for each period. [mass/area].
    """
    # Compute change in SWE. Set melt as a positive change.
    dsnw = snw.diff(dim="time") * -1

    # Add precipitation total
    total = rate2amount(pr) + dsnw

    # Sum over window
    agg = total.rolling(time=window).sum()

    # Max over period
    out = agg.resample(time=freq).max(dim="time")
    out.attrs["units"] = snw.units
    return out




[docs]
@declare_units(
    gwl="[length]",
    params="[]",
)
def standardized_groundwater_index(
    gwl: xarray.DataArray,
    freq: str | None = "MS",
    window: int = 1,
    dist: str | rv_continuous = "genextreme",
    method: str = "ML",
    fitkwargs: dict | None = None,
    cal_start: DateStr | None = None,
    cal_end: DateStr | None = None,
    params: Quantified | None = None,
    **indexer,
) -> xarray.DataArray:
    r"""
    Standardized Groundwater Index (SGI).

    Parameters
    ----------
    gwl : xarray.DataArray
        Groundwater head level.
    freq : str, optional
        Resampling frequency. A monthly or daily frequency is expected. Option `None` assumes
        that the desired resampling has already been applied input dataset and will skip the resampling step.
    window : int
        Averaging window length relative to the resampling frequency. For example, if `freq="MS"`,
        i.e. a monthly resampling, the window is an integer number of months.
    dist : {"gamma", "genextreme", "lognorm"} or `rv_continuous`
        Name of the univariate distribution, or a callable `rv_continuous` (see :py:mod:`scipy.stats`).
    method : {"APP", "ML", "PWM"}
        Name of the fitting method, such as `ML` (maximum likelihood), `APP` (approximate).
        The approximate method uses a deterministic function that does not involve any optimization.
        `PWM` should be used with a `lmoments3` distribution.
    fitkwargs : dict, optional
        Kwargs passed to ``xclim.indices.stats.fit`` used to impose values of certain parameters (`floc`, `fscale`).
    cal_start : DateStr, optional
        Start date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`.
        Default option `None` means that the calibration period begins at the start of the input dataset.
    cal_end : DateStr, optional
        End date of the calibration period. A `DateStr` is expected, that is a `str` in format `"YYYY-MM-DD"`.
        Default option `None` means that the calibration period finishes at the end of the input dataset.
    params : xarray.DataArray, optional
        Fit parameters.
        The `params` can be computed using ``xclim.indices.stats.standardized_index_fit_params`` in advance.
        The output can be given here as input, and it overrides other options.
    **indexer : Indexer
        Indexing parameters to compute the indicator on a temporal subset of the data.
        It accepts the same arguments as :py:func:`xclim.indices.generic.select_time`.

    Returns
    -------
    xarray.DataArray, [unitless]
        Standardized Groundwater Index.

    See Also
    --------
    xclim.indices._agro.standardized_precipitation_index : Standardized Precipitation Index.
    xclim.indices.stats.standardized_index : Standardized Index.
    xclim.indices.stats.standardized_index_fit_params : Standardized Index Fit Params.

    Notes
    -----
    * N-month SGI / N-day SGI is determined by choosing the `window = N` and the appropriate frequency `freq`.
    * Supported statistical distributions are: ["gamma", "genextreme", "lognorm"].
    * If `params` is provided, it overrides the `cal_start`, `cal_end`, `freq`, `window`, `dist`, `method` options.
    * "APP" method only supports two-parameter distributions. Parameter `loc` needs to be fixed to use method "APP".

    References
    ----------
    :cite:cts:`bloomfield_2013`

    Examples
    --------
    >>> from datetime import datetime
    >>> from xclim.indices import standardized_groundwater_index
    >>> ds = xr.open_dataset(path_to_gwl_file)
    >>> gwl = ds.gwl
    >>> cal_start, cal_end = "1980-05-01", "1982-06-01"
    >>> sgi_3 = standardized_groundwater_index(
    ...     gwl,
    ...     freq="MS",
    ...     window=3,
    ...     dist="gamma",
    ...     method="ML",
    ...     cal_start=cal_start,
    ...     cal_end=cal_end,
    ... )  # Computing SGI-3 months using a Gamma distribution for the fit
    >>> # Fitting parameters can also be obtained first, then reused as input.
    >>> from xclim.indices.stats import standardized_index_fit_params
    >>> params = standardized_index_fit_params(
    ...     gwl.sel(time=slice(cal_start, cal_end)),
    ...     freq="MS",
    ...     window=3,
    ...     dist="gamma",
    ...     method="ML",
    ... )  # First getting params
    >>> sgi_3 = standardized_groundwater_index(gwl, params=params)
    """
    fitkwargs = fitkwargs or {}

    dist_methods = {
        "gamma": ["ML", "APP"],
        # FIXME: xclim-v1 — remove "APP"
        "genextreme": ["ML", "APP"],
        "lognorm": ["ML", "APP"],
    }
    if isinstance(dist, str):
        if dist in dist_methods:
            if method not in dist_methods[dist]:
                raise NotImplementedError(f"{method} method is not implemented for {dist} distribution")
        else:
            raise NotImplementedError(f"{dist} distribution is not yet implemented.")

    zero_inflated = False
    sgi = standardized_index(
        gwl,
        freq=freq,
        window=window,
        dist=dist,
        method=method,
        zero_inflated=zero_inflated,
        fitkwargs=fitkwargs,
        cal_start=cal_start,
        cal_end=cal_end,
        params=params,
        **indexer,
    )

    return sgi




[docs]
@declare_units(q="[discharge]")
def flow_index(q: xarray.DataArray, p: float = 0.95) -> xarray.DataArray:
    """
    Flow index.

    Calculate the pth percentile of daily streamflow normalized by the median flow.

    Parameters
    ----------
    q : xarray.DataArray
        Daily streamflow data.
    p : float
        Percentile for calculating the flow index, between 0 and 1. Default of 0.95 is for high flows.

    Returns
    -------
    xarray.DataArray
        Normalized Qp, which is the p th percentile of daily streamflow normalized by the median flow.

    References
    ----------
    :cite:cts:`Clausen2000`
    """
    qp = q.quantile(p, dim="time")
    q_median = q.median(dim="time")
    out = qp / q_median
    out.attrs["units"] = "1"
    return out




[docs]
@declare_units(q="[discharge]")
def high_flow_frequency(q: xarray.DataArray, threshold_factor: int = 9, freq: str = "YS-OCT") -> xarray.DataArray:
    """
    High flow frequency.

    Calculate the number of days in a given period with flows greater than a specified threshold, given as a
    multiple of the median flow. By default, the period is the water year starting on 1st October and ending on
    30th September, as commonly defined in North America.

    Parameters
    ----------
    q : xarray.DataArray
        Daily streamflow data.
    threshold_factor : int
        Factor by which the median flow is multiplied to set the high flow threshold, default is 9.
    freq : str
        Resampling frequency, default is 'YS-OCT' for water year starting in October and ending in September.

    Returns
    -------
    xarray.DataArray
        Number of high flow days.

    References
    ----------
    :cite:cts:`addor2018,Clausen2000`
    """
    median_flow = q.median(dim="time")
    threshold = threshold_factor * median_flow
    out = threshold_count(q, ">", threshold, freq=freq)
    return to_agg_units(out, q, "count", deffreq="D")




[docs]
@declare_units(q="[discharge]")
def low_flow_frequency(q: xarray.DataArray, threshold_factor: float = 0.2, freq: str = "YS-OCT") -> xarray.DataArray:
    """
    Low flow frequency.

    Calculate the number of days in a given period with flows lower than a specified threshold, given by a fraction
    of the mean flow. By default, the period is the water year starting on 1st October and ending on 30th September,
    as commonly defined in North America.

    Parameters
    ----------
    q : xarray.DataArray
        Daily streamflow data.
    threshold_factor : float
        Factor by which the mean flow is multiplied to set the low flow threshold, default is 0.2.
    freq : str
        Resampling frequency, default is 'YS-OCT' for water year starting in October and ending in September.

    Returns
    -------
    xarray.DataArray
        Number of low flow days.

    References
    ----------
    :cite:cts:`Olden2003`
    """
    mean_flow = q.mean(dim="time")
    threshold = threshold_factor * mean_flow
    out = threshold_count(q, "<", threshold, freq=freq)
    return to_agg_units(out, q, "count", deffreq="D")




[docs]
@declare_units(pr="[precipitation]")
def antecedent_precipitation_index(pr: xarray.DataArray, window: int = 7, p_exp: float = 0.935) -> xarray.DataArray:
    """
    Antecedent Precipitation Index.

    Calculate the running weighted sum of daily precipitation values given a window and weighting exponent.
    This index serves as an indicator for soil moisture.

    Parameters
    ----------
    pr : xarray.DataArray
        Daily precipitation data.
    window : int
        Window for the days of precipitation data to be weighted and summed, default is 7.
    p_exp : float
        Weighting exponent, default is 0.935.

    Returns
    -------
    xarray.DataArray
        Antecedent Precipitation Index.

    References
    ----------
        :cite:cts:`schroter2015,li2021`
    """
    pr = rate2amount(pr)
    pr = convert_units_to(pr, "mm", context="hydro")
    weights = xarray.DataArray(
        list(reversed([p_exp ** (idx - 1) for idx in range(1, window + 1)])),
        dims="window_dim",
    )
    out = pr.rolling(time=window).construct("window_dim").dot(weights)
    out.attrs["units"] = "mm"
    return out




[docs]
def runoff_ratio(
    q: xarray.DataArray,
    a: xarray.DataArray,
    pr: xarray.DataArray,
    freq: str = "YS",
) -> xarray.DataArray:
    """
    Runoff ratio.

    Runoff ratio: Ratio of runoff volume measured at the stream to the total
    precipitation volume over the watershed.
    Temporal analysis: Yearly values computed from seasonal daily data and yearly data, depending on chosen frequency.
    (e.g., 'YS' for yearly starting Jan, or 'QS-DEC' for seasons,
    '30YS' to compute the value over slices of 30 years from the start of the time series).

    Parameters
    ----------
    q : xarray.DataArray
        Streamflow in discharge units. Will be converted to [m³/s].
    a : xarray.DataArray
        Watershed area in area units. Will be converted to [km²].
    pr : xarray.DataArray
        Mean daily precipitation in precipitation units. Will be converted to [mm/hr].
    freq : str
        Resampling frequency.

    Returns
    -------
    xarray.DataArray
        Out: xarray.DataArray
            Runoff ratio (dimensionless).

    Notes
    -----
    - Runoff ratio values are comparable to runoff coefficients.
    - Values near 0 mean most precipitation infiltrates watershed soil
      or is lost to evapotranspiration.
    - Values near 1 mean most precipitation leaves the watershed as runoff.
      Possible causes are impervious surfaces from urban sprawl, thin soils,
      steep slopes, etc.
    - Annual runoff ratios are typically ≤ 1.
    - Annual runoff ratios are typically higher than summer runoff ratios due to
      higher levels of evapotranspiration in summer months.
    - For snow-driven watersheds, spring runoff ratios are typically higher than
      annual runoff ratios, as snowmelt generates concentrated runoff events.

    References
    ----------
    :cite:cts:'knoben_2024'
    """
    q = convert_units_to(q, "m3/s")
    a = convert_units_to(a, "km2")
    pr = convert_units_to(pr, "mm/hr")

    runoff = q * 3.6 / a  # unit conversion for runoff in mm/hr : 3.6[s/hr *km2/m2]
    runoff_freq = runoff.resample(time=freq).sum()
    pr_freq = pr.resample(time=freq).sum()

    out = runoff_freq / pr_freq
    out.attrs["units"] = ""
    return out




[docs]
@declare_units(swe="[length]", thresh="[length]")
def days_with_snowpack(
    swe: xarray.DataArray,
    thresh: str = "10 mm",
    freq: str = "YS-OCT",
) -> xarray.DataArray:
    """
    Days with snowpack.

    Number of days with snow water equivalent (SWE) above a given threshold.

    Parameters
    ----------
    swe : xarray.DataArray
        Daily surface snow amount as snow water equivalent.
    thresh : float
        Minimum snow quantity to consider a given day snow-covered. Default is 10 mm.
    freq : str
        Resampling frequency. Typically the water year starting on the 1st of October
        in the Northern Hemisphere.

    Returns
    -------
    xarray.DataArray
        Number of days with snowpack above the threshold.

    Notes
    -----
    Years with larger snowpacks tend to produce bigger spring floods.
    Additional spring flood analysis can be carried out using the
    ``annual_maxima`` and ``lag_snowpack_flow_peaks`` functions.

    References
    ----------
    :cite:cts:`alonso_gonzalez_2022`
    """
    frz = convert_units_to(thresh, swe)
    out = threshold_count(swe, ">", frz, freq)
    return to_agg_units(out, swe, "count", deffreq="D")




[docs]
@declare_units(pr="[precipitation]", pet="[precipitation]")
def aridity_index(pr: xarray.DataArray, pet: xarray.DataArray, freq: str = "YS") -> xarray.DataArray:
    """
    Aridity index.

    The ratio of total precipitation over potential evapotranspiration.
    Classification based on the Aridity Index (AI).
    +----------------+----------------+-----------------+
    | Classification | Aridity Index  | Global land area|
    +----------------+----------------+-----------------+
    | Hyperarid      | AI < 0.05      | 7.5%            |
    | Arid           | 0.05 ≤ AI < 0.20 | 12.1%         |
    | Semi-arid      | 0.20 ≤ AI < 0.50 | 17.7%         |
    | Dry subhumid   | 0.50 ≤ AI < 0.65 | 9.9%          |
    +----------------+----------------+-----------------+

    Parameters
    ----------
    pr : array_like
        Precipitation.
    pet : array_like
        Potential evapotranspiration.
    freq : str
        Resampling frequency. A monthly or yearly frequency is expected.

    Returns
    -------
    float
        Aridity index per time step (Unitless).

    Notes
    -----
    - An aridity index below 0.65 indicates an arid environment,
      while values above this threshold correspond to more humid environments.
    - In North America, higher aridity index values can be associated with colder climates
      due to lower evapotranspiration, even when precipitation is limited or occurring as snow.

    References
    ----------
    :cite:cts:'zomer_2022'
    """
    pr = pr.resample(time=freq).sum()
    pet = pet.resample(time=freq).sum()
    ai = pr / pet
    ai.attrs["units"] = ""

    return ai




[docs]
@declare_units(swe="[length]", q="[discharge]")
def lag_snowpack_flow_peaks(
    swe: xarray.DataArray,
    q: xarray.DataArray,
    freq: str = "YS-OCT",
    percentile: int = 90,
) -> xarray.DataArray:
    """
    Time lag between maximum snowpack and river high flows.

    Number of days between the annual maximum snowpack, measured by the snow water
    equivalent, and the mean date when river flow exceeds a percentile threshold
    during a given year.
    If the time lag between maximum snowpack and river high flows is ≤ 50 days,
    the watershed is likely in a nival regime.

    Parameters
    ----------
    swe : xarray.DataArray
        Surface snow amount as snow water equivalent.
    q : xarray.DataArray
        Streamflow.
    freq : str
        Resampling frequency. Defaults to the water year starting on the 1st of October.
    percentile : float
        Percentile threshold identifying high flows. Defaults to the 90th percentile.

    Returns
    -------
    xarray.DataArray
        Number of days between maximum snowpack and the circular mean date of high flow days.

    See Also
    --------
    xclim.indices.rb_flashiness_index: Richards-Baker flashiness index.

    Notes
    -----
    - The default ``freq`` is the water year used in the Northern Hemisphere, from October to September.
    - It is recommended to have at least 70% of valid data per water year in order to compute significant values.
    - Nival regime is characterized by a hydrological response dominated by snowmelt,
      where maximum flows occur shortly after peak snow cover (Burn et al., 2010).
    - The 50-day threshold is approximate and depends on the specific responsiveness of each watershed.
    - A negative value means the high flows occur before the peak snow cover.

    References
    ----------
    :cite:cts:`burn_2010`
    """
    # Find time of max SWE per year
    t_swe_max = swe.resample(time=freq).map(lambda x: x.idxmax())  # if x.max() > 0 else np.nan)
    t_swe_max = t_swe_max.where(swe.resample(time=freq).max() > 0)
    doy_swe_max = t_swe_max.dt.dayofyear

    # Compute percentile threshold per water year using resample
    thresh = q.resample(time="YS-OCT").reduce(
        np.nanpercentile, q=percentile, dim="time"
    )  # the second q, equal to percentile, is a keyword in np.nanpercentile, not the flow variable.
    threshold_for_each_time = thresh.reindex_like(q, method="ffill")
    q_high = q.where(q >= threshold_for_each_time).dropna(dim="time", how="all")

    # Day of year for high flow peaks
    doy = q_high.time.dt.dayofyear

    t_q_max = doy.resample(time=freq).reduce(partial(circmean, high=366, low=1), dim="time")

    # Compute lag
    lag = t_q_max - doy_swe_max
    lag.attrs["units"] = "days"
    return lag




[docs]
@declare_units(q="[discharge]")
def sen_slope(
    q: xarray.DataArray,
    qsim: xarray.DataArray = None,
) -> xarray.Dataset:
    """
    Sen Slope : Temporal robustness analysis of streamflow.

    Computes annual and seasonal Theil–Sen slope estimators and performs the
    Mann–Kendall test for trend evaluation.

    Parameters
    ----------
    q : xarray.DataArray
        Observed streamflow vector.
    qsim : xarray.DataArray
        Simulated streamflow vector.

    Returns
    -------
    xarray.Dataset
        Dataset containing the following variables:

        - ``Sen_slope`` : Sen's slope estimates for seasonal and yearly averages.
        - ``p_value`` : Mann–Kendall metric indicating slope tendency.
        - If simulated flows are provided: ``Sen_slope_sim``, ``p_value_sim``,
          and the ratio of observed ``Sen_slope`` over simulated ``Sen_slope``.

    Notes
    -----
    - If p-value <= 0.05, the trend is statistically significant at the 5% level.
    - The ratio of observed Sen_slope over simulated Sen_slope is considered
      acceptable within the range 0.5–2 and is optimal when equal to 1
      (Sauquet et al., 2025).

    References
    ----------
    :cite:cts:`sauquet_2025`
    """
    seasons = ["DJF", "MAM", "JJA", "SON", "Year"]

    def compute_seasonal_stats(x: xarray.DataArray) -> tuple[list, list]:
        """
        Seasonal statistics.

        Parameters
        ----------
        x : xarray.DataArray
            Time series of streamflow.

        Returns
        -------
        tuple of list
            Lists containing the following variables:

            - ``Sen_slope`` : Sen's slope estimates for seasonal and yearly averages.
            - ``p_value`` : Mann–Kendall metric indicating slope tendency.
        """
        if mk is None:
            msg = f"{sen_slope.__name__} requires access to the `pymannkendall` library."
            raise ModuleNotFoundError(msg)

        # Convert to pandas Series with DatetimeIndex
        x_year = x.resample(time="YS-DEC").mean()
        x_season = x.resample(time="QS-DEC").mean()

        x_series = x_season.to_series()

        # Create a MultiIndex: year + season (0–3)
        season_index = (
            x_series.index.month % 12 // 3  # 0 for DJF, 1 for MAM, etc.
        )
        x_df = pd.DataFrame({"value": x_series.values, "season": season_index, "year": x_series.index.year})
        #  Pivot to shape (n_years, 4 seasons)
        df_seasons = x_df.pivot(index="season", columns="year", values="value")

        # rename columns
        df_seasons.index = ["DJF", "MAM", "JJA", "SON"]

        ss_DJF = mk.original_test(df_seasons.iloc[0])
        ss_MAM = mk.original_test(df_seasons.iloc[1])
        ss_JJA = mk.original_test(df_seasons.iloc[2])
        ss_SON = mk.original_test(df_seasons.iloc[3])
        ss_an = mk.original_test(x_year)

        _slopes = [ss_DJF.slope, ss_MAM.slope, ss_JJA.slope, ss_SON.slope, ss_an.slope]
        _p_vals = [ss_DJF.p, ss_MAM.p, ss_JJA.p, ss_SON.p, ss_an.p]

        return _slopes, _p_vals

    if qsim is not None:
        slopes, p_vals = compute_seasonal_stats(q)
        slopes_sim, p_vals_sim = compute_seasonal_stats(qsim)
        slopes_np = np.array(slopes)
        slopes_sim_np = np.array(slopes_sim)
        ratio = slopes_np / slopes_sim_np
        ds = xarray.Dataset(
            data_vars={
                "Sen_slope_obs": ("season", slopes),
                "p_value_obs": ("season", p_vals),
                "Sen_slope_sim": ("season", slopes_sim),
                "p_value_sim": ("season", p_vals_sim),
                "ratio": ("season", ratio),
            },
            coords={"season": seasons},
        )

    else:
        slopes, p_vals = compute_seasonal_stats(q)
        # Create labeled xarray
        ds = xarray.Dataset(
            data_vars={"Sen_slope": ("season", slopes), "p_value": ("season", p_vals)}, coords={"season": seasons}
        )

    # Assign empty units to all variables
    for var in ds.data_vars:
        ds[var].attrs["units"] = ""

    return ds




[docs]
@declare_units(q="[discharge]")
def base_flow_index_seasonal_ratio(
    q: xarray.DataArray, freq: str = "QS-DEC"
) -> tuple[DataArray, DataArray, DataArray, DataArray, DataArray]:
    """
    Seasonal Base flow index (bfi) and ratio of winter to summer base flow index.

    Return yearly base flow index per season, defined as the minimum 7-day average flow divided by the mean flow
    as well as yearly winter to summer bfi ratio.

    Parameters
    ----------
    q : xarray.DataArray
        Rate of river discharge.
    freq : str
        Resampling frequency.

    Returns
    -------
    xarray.DataArray, [dimensionless]
        Winter_bfi.
    xarray.DataArray, [dimensionless]
        Spring_bfi.
    xarray.DataArray, [dimensionless]
        Summer_bfi.
    xarray.DataArray, [dimensionless]
        Fall_bfi.
    xarray.DataArray, [dimensionless]
        Winter_to summer_ratio.

    Notes
    -----
    It is recommended to have at least 70% of valid data per month in order to compute significant values.

    References
    ----------
    :cite:cts:`singh_2019`
    :cite:cts:`jaffres_2021`
    """
    # 7-day minimum of raw daily flow
    q7min = q.rolling(time=7, center=True).min(skipna=False).resample(time=freq).min()

    qmean = q.resample(time=freq).mean()
    bfi = q7min / qmean

    winter_bfi = bfi.where(bfi["time"].dt.month == 12).groupby("time.year").mean(dim="time")
    winter_bfi.attrs["units"] = ""

    spring_bfi = bfi.where(bfi["time"].dt.month == 3).groupby("time.year").mean(dim="time")
    spring_bfi.attrs["units"] = ""

    summer_bfi = bfi.where(bfi["time"].dt.month == 6).groupby("time.year").mean(dim="time")
    summer_bfi.attrs["units"] = ""

    fall_bfi = bfi.where(bfi["time"].dt.month == 9).groupby("time.year").mean(dim="time")
    fall_bfi.attrs["units"] = ""

    # Shift timestamp forward by one year since winter starts in dec the year prior
    winter_1 = winter_bfi.assign_coords(year=winter_bfi["year"] + 1)
    winter_1_aligned = winter_1.reindex(year=summer_bfi.year)
    epsilon = 1e-3  # To avoid division by zero

    w_s_ratio = (winter_1_aligned / (summer_bfi + epsilon)).sel(year=summer_bfi.year)
    w_s_ratio = w_s_ratio.isel(year=slice(1, None))
    w_s_ratio.attrs["units"] = ""

    return winter_bfi, spring_bfi, summer_bfi, fall_bfi, w_s_ratio