[1]:

# This cell is not visible when the documentation is built.

from __future__ import annotations

import numpy as np
import pandas as pd
import xarray as xr
from scipy.interpolate import interp1d

# Workaround for determining the notebook folder within a running notebook
try:
    from _finder import _find_current_folder

    notebook_folder = _find_current_folder()
except ImportError:
    from pathlib import Path

    notebook_folder = Path().cwd()

pd.plotting.register_matplotlib_converters()

data_folder = notebook_folder / "data"
data_folder.mkdir(exist_ok=True)

# time vector on 4 years
times = pd.date_range("2000-01-01", "2003-12-31", freq="D")
# temperature data as seasonal cycle -18 to 18
tas = xr.DataArray(
    -18 * np.cos(2 * np.pi * times.dayofyear / 365),
    dims=("time",),
    coords={"time": times},
    name="tas",
    attrs={
        "units": "degC",
        "standard_name": "air_temperature",
        "long_name": "Mean air temperature at surface",
    },
)

# write 10 members adding cubic-smoothed gaussian noise of wave number 43 and amplitude 20
# resulting temp will oscillate between -18 and 38
for i in range(10):
    tasi = tas + 20 * interp1d(np.arange(43), np.random.random((43,)), kind="quadratic")(np.linspace(0, 42, tas.size))
    tasi.name = "tas"
    tasi.attrs.update(tas.attrs)
    tasi.attrs["title"] = f"tas of member {i:02d}"
    tasi.to_netcdf(data_folder.joinpath(f"ens_tas_m{i}.nc"))

# Create 'toy' criteria selection data
np.random.normal(loc=3.5, scale=1.5, size=50)
# crit['delta_annual_tavg']
np.random.seed(0)
test = xr.DataArray(np.random.normal(loc=3, scale=1.5, size=100), dims=["realization"]).assign_coords(
    horizon="2041-2070"
)
test = xr.concat(
    (
        test,
        xr.DataArray(np.random.normal(loc=5.34, scale=2, size=100), dims=["realization"]).assign_coords(
            horizon="2071-2100"
        ),
    ),
    dim="horizon",
)

ds_crit = xr.Dataset()

ds_crit["delta_annual_tavg"] = test
test = xr.DataArray(np.random.normal(loc=5, scale=5, size=100), dims=["realization"]).assign_coords(horizon="2041-2070")
test = xr.concat(
    (
        test,
        xr.DataArray(np.random.normal(loc=10, scale=8, size=100), dims=["realization"]).assign_coords(
            horizon="2071-2100"
        ),
    ),
    dim="horizon",
)
ds_crit["delta_annual_prtot"] = test
test = xr.DataArray(np.random.normal(loc=0, scale=3, size=100), dims=["realization"]).assign_coords(horizon="2041-2070")
test = xr.concat(
    (
        test,
        xr.DataArray(np.random.normal(loc=2, scale=4, size=100), dims=["realization"]).assign_coords(
            horizon="2071-2100"
        ),
    ),
    dim="horizon",
)
ds_crit["delta_JJA_prtot"] = test

Ensembles¶

An important aspect of climate models is that they are run multiple times with some initial perturbations to see how they replicate the natural variability of the climate. Through xclim.ensembles, xclim provides an easy interface to compute ensemble statistics on different members. Most methods perform checks and conversion on top of simpler xarray methods, providing an easier interface to use.

create_ensemble¶

Our first step is to create an ensemble. This method takes a list of files defining the same variables over the same coordinates and concatenates them into one dataset with an added dimension realization.

Using xarray a very simple way of creating an ensemble dataset would be :

import xarray

xarray.open_mfdataset(files, concat_dim='realization')

However, this is only successful when the dimensions of all the files are identical AND only if the calendar type of each netcdf file is the same

xclim’s create_ensemble() method overcomes these constraints, selecting the common time period to all files and assigns a standard calendar type to the dataset.

Input netcdf files still require equal spatial dimension size (e.g. lon, lat dimensions).

Given files all named ens_tas_m[member number].nc, we use glob to get a list of all those files.

[2]:

import matplotlib as mpl
import matplotlib.pyplot as plt
import xarray as xr

from xclim import ensembles

# Set display to HTML style (for fancy output)
xr.set_options(display_style="html", display_width=50)

%matplotlib inline

ens = ensembles.create_ensemble(data_folder.glob("ens_tas_m*.nc")).load()
ens.close()

[3]:

plt.style.use("seaborn-v0_8-dark")
plt.rcParams["figure.figsize"] = (13, 5)
ens.tas.plot(hue="realization")
plt.show()

[4]:

ens.tas  # Attributes of the first dataset to be opened are copied to the final output

[4]:

<xarray.DataArray 'tas' (realization: 10,
                         time: 1461)> Size: 117kB
array([[ -6.99305501,  -7.05764555,  -7.11798815, ...,  -9.37197873,
         -9.18335896,  -8.98229947],
       [-12.17678179, -12.44634819, -12.69841108, ..., -16.40999576,
        -16.6504312 , -16.88546336],
       [ -7.37020569,  -7.01344325,  -6.66907168, ...,  -7.1740622 ,
         -7.75008265,  -8.34245866],
       ...,
       [-12.64382953, -12.25179616, -11.86104235, ...,  -9.78096737,
        -10.10148413, -10.42904503],
       [-11.68304385, -11.43173508, -11.18442258, ..., -13.75999645,
        -14.62949591, -15.52168949],
       [-16.22188524, -15.99864981, -15.77549606, ..., -12.52048684,
        -12.32357507, -12.1169032 ]], shape=(10, 1461))
Coordinates:
  * realization  (realization) int64 80B 0 1 ... 9
  * time         (time) datetime64[ns] 12kB 200...
Attributes:
    units:          degC
    standard_name:  air_temperature
    long_name:      Mean air temperature at sur...
    title:          tas of member 06

Ensembles¶

create_ensemble¶

Ensemble statistics¶

Ensemble percentiles¶

Change significance and model agreement¶