Source code for xclim.ensembles._partitioning

# noqa: D205,D400
"""
Uncertainty Partitioning
========================

This module implements methods and tools meant to partition climate projection uncertainties into different components.
"""


from __future__ import annotations

import numpy as np
import pandas as pd
import xarray as xr

"""
Implemented partitioning algorithms:

 - `hawkins_sutton`
 - `lafferty_sriver`

# References for other more recent algorithms that could be added here.

Yip, S., Ferro, C. A. T., Stephenson, D. B., and Hawkins, E. (2011). A Simple, Coherent Framework for Partitioning
Uncertainty in Climate Predictions. Journal of Climate 24, 17, 4634-4643, doi:10.1175/2011JCLI4085.1

Northrop, P. J., & Chandler, R. E. (2014). Quantifying sources of uncertainty in projections of future climate.
Journal of Climate, 27(23), 8793–8808, doi:10.1175/JCLI-D-14-00265.1

Goldenson, N., Mauger, G., Leung, L. R., Bitz, C. M., & Rhines, A. (2018). Effects of ensemble configuration on
estimates of regional climate uncertainties. Geophysical Research Letters, 45, 926– 934.
https://doi.org/10.1002/2017GL076297

Lehner, F., Deser, C., Maher, N., Marotzke, J., Fischer, E. M., Brunner, L., Knutti, R., and Hawkins,
E. (2020). Partitioning climate projection uncertainty with multiple large ensembles and CMIP5/6, Earth Syst. Dynam.,
11, 491–508, https://doi.org/10.5194/esd-11-491-2020.

Evin, G., Hingray, B., Blanchet, J., Eckert, N., Morin, S., & Verfaillie, D. (2019). Partitioning Uncertainty
Components of an Incomplete Ensemble of Climate Projections Using Data Augmentation, Journal of Climate, 32(8),
2423-2440, https://doi.org/10.1175/JCLI-D-18-0606.1

Beigi E, Tsai FT-C, Singh VP, Kao S-C. Bayesian Hierarchical Model Uncertainty Quantification for Future Hydroclimate
Projections in Southern Hills-Gulf Region, USA. Water. 2019; 11(2):268. https://doi.org/10.3390/w11020268

Related bixtex entries:
 - yip_2011
 - northrop_2014
 - goldenson_2018
 - lehner_2020
 - evin_2019
"""

# TODO: Add ref for Brekke and Barsugli (2013)


[docs] def hawkins_sutton( da: xr.DataArray, sm: xr.DataArray | None = None, weights: xr.DataArray | None = None, baseline: tuple[str, str] = ("1971", "2000"), kind: str = "+", ) -> tuple[xr.DataArray, xr.DataArray]: """Return the mean and partitioned variance of an ensemble based on method from Hawkins & Sutton (2009). Parameters ---------- da : xr.DataArray Time series with dimensions 'time', 'scenario' and 'model'. sm : xr.DataArray, optional Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a 4th-order polynomial. Results are sensitive to the choice of smoothing function, use this to set another polynomial order, or a LOESS curve. weights : xr.DataArray, optional Weights to be applied to individual models. Should have `model` dimension. baseline : (str, str) Start and end year of the reference period. kind : {'+', '*'} Whether the mean over the reference period should be subtracted (+) or divided by (*). Returns ------- xr.DataArray, xr.DataArray The mean relative to the baseline, and the components of variance of the ensemble. These components are coordinates along the `uncertainty` dimension: `variability`, `model`, `scenario`, and `total`. Notes ----- To prepare input data, make sure `da` has dimensions `time`, `scenario` and `model`, e.g. `da.rename({"scen": "scenario"})`. To reproduce results from :cite:t:`hawkins_2009`, input data should meet the following requirements: - annual time series starting in 1950 and ending in 2100; - the same models are available for all scenarios. To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the output. References ---------- :cite:cts:`hawkins_2009,hawkins_2011` """ if xr.infer_freq(da.time)[0] not in ["A", "Y"]: raise ValueError("This algorithm expects annual time series.") if not {"time", "scenario", "model"}.issubset(da.dims): raise ValueError( "DataArray dimensions should include 'time', 'scenario' and 'model'." ) # Confirm the same models have data for all scenarios check = da.notnull().any("time").all("scenario") if not check.all(): raise ValueError(f"Some models are missing data for some scenarios: \n {check}") if weights is None: weights = xr.ones_like(da.model, float) if sm is None: # Fit 4th order polynomial to smooth natural fluctuations # Note that the order of the polynomial has a substantial influence on the results. fit = da.polyfit(dim="time", deg=4, skipna=True) sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where( da.notnull() ) # Decadal mean residuals res = (da - sm).rolling(time=10, center=True).mean() # Individual model variance after 2000: V # Note that the historical data is the same for all scenarios. nv_u = ( res.sel(time=slice("2000", None)) .var(dim=("scenario", "time")) .weighted(weights) .mean("model") ) # Compute baseline average ref = sm.sel(time=slice(*baseline)).mean(dim="time") # Remove baseline average from smoothed time series if kind == "+": sm -= ref elif kind == "*": sm /= ref else: raise ValueError(kind) # Model uncertainty: M(t) model_u = sm.weighted(weights).var(dim="model").mean(dim="scenario") # Scenario uncertainty: S(t) scenario_u = sm.weighted(weights).mean(dim="model").var(dim="scenario") # Total uncertainty: T(t) total = nv_u + scenario_u + model_u # Create output array with the uncertainty components u = pd.Index(["variability", "model", "scenario", "total"], name="uncertainty") uncertainty = xr.concat([nv_u, model_u, scenario_u, total], dim=u) # Keep a trace of the elements for each uncertainty component for d in ["model", "scenario"]: uncertainty.attrs[d] = da[d].values # Mean projection: G(t) g = sm.weighted(weights).mean(dim="model").mean(dim="scenario") return g, uncertainty
[docs] def hawkins_sutton_09_weighting(da, obs, baseline=("1971", "2000")): """Return weights according to the ability of models to simulate observed climate change. Weights are computed by comparing the 2000 value to the baseline mean: w_m = 1 / (x_{obs} + | x_{m, 2000} - x_obs | ) Parameters ---------- da : xr.DataArray Input data over the historical period. Should have a time and model dimension. obs : float Observed change. baseline : (str, str) Baseline start and end year. Returns ------- xr.DataArray Weights over the model dimension. """ mm = da.sel(time=slice(*baseline)).mean("time") xm = da.sel(time=baseline[1]) - mm xm = xm.drop_vars("time").squeeze() return 1 / (obs + np.abs(xm - obs))
[docs] def lafferty_sriver( da: xr.DataArray, sm: xr.DataArray = None, bb13: bool = False, ) -> tuple[xr.DataArray, xr.DataArray]: """Return the mean and partitioned variance of an ensemble based on method from Lafferty and Sriver (2023). Parameters ---------- da : xr.DataArray Time series with dimensions 'time', 'scenario', 'downscaling' and 'model'. sm : xr.DataArray Smoothed time series over time, with the same dimensions as `da`. By default, this is estimated using a 4th-order polynomial. Results are sensitive to the choice of smoothing function, use this to set another polynomial order, or a LOESS curve. bb13 : bool Whether to apply the Brekke and Barsugli (2013) method to estimate scenario uncertainty, where the variance over scenarios is computed before taking the mean over models and downscaling methods. Returns ------- xr.DataArray, xr.DataArray The mean relative to the baseline, and the components of variance of the ensemble. These components are coordinates along the `uncertainty` dimension: `variability`, `model`, `scenario`, `downscaling` and `total`. Notes ----- To prepare input data, make sure `da` has dimensions `time`, `scenario`, `downscaling` and `model`, e.g. `da.rename({"experiment": "scenario"})`. To get the fraction of the total variance instead of the variance itself, call `fractional_uncertainty` on the output. References ---------- :cite:cts:`Lafferty2023` """ if xr.infer_freq(da.time)[0] not in ["A", "Y"]: raise ValueError("This algorithm expects annual time series.") if not {"time", "scenario", "model", "downscaling"}.issubset(da.dims): raise ValueError( "DataArray dimensions should include 'time', 'scenario', 'downscaling' and 'model'." ) if sm is None: # Fit a 4th order polynomial fit = da.polyfit(dim="time", deg=4, skipna=True) sm = xr.polyval(coord=da.time, coeffs=fit.polyfit_coefficients).where( da.notnull() ) # "Interannual variability is then estimated as the centered rolling 11-year variance of the difference # between the extracted forced response and the raw outputs, averaged over all outputs." nv_u = ( (da - sm) .rolling(time=11, center=True) .var() .mean(dim=["scenario", "model", "downscaling"]) ) # Scenario uncertainty: U_s(t) if bb13: scenario_u = sm.var(dim="scenario").mean(dim=["model", "downscaling"]) else: scenario_u = sm.mean(dim=["model", "downscaling"]).var(dim="scenario") # Model uncertainty: U_m(t) # Count the number of parent models that have been downscaled using method $d$ for scenario $s$. # In the paper, weights are constant, here they may vary across time if there are missing values. mw = sm.count("model") # In https://github.com/david0811/lafferty-sriver_2023_npjCliAtm/blob/main/unit_test/lafferty_sriver.py # weights are set to zero when there is only one model, but the var for a single element is 0 anyway. model_u = sm.var(dim="model").weighted(mw).mean(dim=["scenario", "downscaling"]) # Downscaling uncertainty: U_d(t) dw = sm.count("downscaling") downscaling_u = ( sm.var(dim="downscaling").weighted(dw).mean(dim=["scenario", "model"]) ) # Total uncertainty: T(t) total = nv_u + scenario_u + model_u + downscaling_u # Create output array with the uncertainty components u = pd.Index( ["model", "scenario", "downscaling", "variability", "total"], name="uncertainty" ) uncertainty = xr.concat([model_u, scenario_u, downscaling_u, nv_u, total], dim=u) # Keep a trace of the elements for each uncertainty component for d in ["model", "scenario", "downscaling"]: uncertainty.attrs[d] = da[d].values # Mean projection: # This is not part of the original algorithm, but we want all partition algos to have similar outputs. g = sm.mean(dim="model").mean(dim="scenario").mean(dim="downscaling") return g, uncertainty
[docs] def fractional_uncertainty(u: xr.DataArray) -> xr.DataArray: """Return the fractional uncertainty. Parameters ---------- u : xr.DataArray Array with uncertainty components along the `uncertainty` dimension. Returns ------- xr.DataArray Fractional, or relative uncertainty with respect to the total uncertainty. """ uncertainty = u / u.sel(uncertainty="total") * 100 uncertainty.attrs.update(u.attrs) uncertainty.attrs["long_name"] = "Fraction of total variance" uncertainty.attrs["units"] = "%" return uncertainty