{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This cell is not visible when the documentation is built.\n",
    "\n",
    "from __future__ import annotations\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import xarray as xr\n",
    "from scipy.interpolate import interp1d\n",
    "\n",
    "# Workaround for determining the notebook folder within a running notebook\n",
    "try:\n",
    "    from _finder import _find_current_folder\n",
    "\n",
    "    notebook_folder = _find_current_folder()\n",
    "except ImportError:\n",
    "    from pathlib import Path\n",
    "\n",
    "    notebook_folder = Path().cwd()\n",
    "\n",
    "pd.plotting.register_matplotlib_converters()\n",
    "\n",
    "data_folder = notebook_folder / \"data\"\n",
    "data_folder.mkdir(exist_ok=True)\n",
    "\n",
    "# time vector on 4 years\n",
    "times = pd.date_range(\"2000-01-01\", \"2003-12-31\", freq=\"D\")\n",
    "# temperature data as seasonal cycle -18 to 18\n",
    "tas = xr.DataArray(\n",
    "    -18 * np.cos(2 * np.pi * times.dayofyear / 365),\n",
    "    dims=(\"time\",),\n",
    "    coords={\"time\": times},\n",
    "    name=\"tas\",\n",
    "    attrs={\n",
    "        \"units\": \"degC\",\n",
    "        \"standard_name\": \"air_temperature\",\n",
    "        \"long_name\": \"Mean air temperature at surface\",\n",
    "    },\n",
    ")\n",
    "\n",
    "# write 10 members adding cubic-smoothed gaussian noise of wave number 43 and amplitude 20\n",
    "# resulting temp will oscillate between -18 and 38\n",
    "for i in range(10):\n",
    "    tasi = tas + 20 * interp1d(np.arange(43), np.random.random((43,)), kind=\"quadratic\")(np.linspace(0, 42, tas.size))\n",
    "    tasi.name = \"tas\"\n",
    "    tasi.attrs.update(tas.attrs)\n",
    "    tasi.attrs[\"title\"] = f\"tas of member {i:02d}\"\n",
    "    tasi.to_netcdf(data_folder.joinpath(f\"ens_tas_m{i}.nc\"))\n",
    "\n",
    "# Create 'toy' criteria selection data\n",
    "np.random.normal(loc=3.5, scale=1.5, size=50)\n",
    "# crit['delta_annual_tavg']\n",
    "np.random.seed(0)\n",
    "test = xr.DataArray(np.random.normal(loc=3, scale=1.5, size=100), dims=[\"realization\"]).assign_coords(\n",
    "    horizon=\"2041-2070\"\n",
    ")\n",
    "test = xr.concat(\n",
    "    (\n",
    "        test,\n",
    "        xr.DataArray(np.random.normal(loc=5.34, scale=2, size=100), dims=[\"realization\"]).assign_coords(\n",
    "            horizon=\"2071-2100\"\n",
    "        ),\n",
    "    ),\n",
    "    dim=\"horizon\",\n",
    ")\n",
    "\n",
    "ds_crit = xr.Dataset()\n",
    "\n",
    "ds_crit[\"delta_annual_tavg\"] = test\n",
    "test = xr.DataArray(np.random.normal(loc=5, scale=5, size=100), dims=[\"realization\"]).assign_coords(horizon=\"2041-2070\")\n",
    "test = xr.concat(\n",
    "    (\n",
    "        test,\n",
    "        xr.DataArray(np.random.normal(loc=10, scale=8, size=100), dims=[\"realization\"]).assign_coords(\n",
    "            horizon=\"2071-2100\"\n",
    "        ),\n",
    "    ),\n",
    "    dim=\"horizon\",\n",
    ")\n",
    "ds_crit[\"delta_annual_prtot\"] = test\n",
    "test = xr.DataArray(np.random.normal(loc=0, scale=3, size=100), dims=[\"realization\"]).assign_coords(horizon=\"2041-2070\")\n",
    "test = xr.concat(\n",
    "    (\n",
    "        test,\n",
    "        xr.DataArray(np.random.normal(loc=2, scale=4, size=100), dims=[\"realization\"]).assign_coords(\n",
    "            horizon=\"2071-2100\"\n",
    "        ),\n",
    "    ),\n",
    "    dim=\"horizon\",\n",
    ")\n",
    "ds_crit[\"delta_JJA_prtot\"] = test"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Ensembles\n",
    "=========\n",
    "\n",
    "An important aspect of climate models is that they are run multiple times with some initial perturbations to see how they replicate the natural variability of the climate. Through [xclim.ensembles](../api.rst#ensembles-module), xclim provides an easy interface to compute ensemble statistics on different members. Most methods perform checks and conversion on top of simpler `xarray` methods, providing an easier interface to use.\n",
    "\n",
    "### create_ensemble\n",
    "Our first step is to create an ensemble. This method takes a list of files defining the same variables over the same coordinates and concatenates them into one dataset with an added dimension `realization`.\n",
    "\n",
    "Using `xarray` a very simple way of creating an ensemble dataset would be :\n",
    "```python\n",
    "import xarray\n",
    "\n",
    "xarray.open_mfdataset(files, concat_dim='realization')\n",
    "```\n",
    "\n",
    "However, this is only successful when the dimensions of all the files are identical AND only if the calendar type of each netcdf file is the same\n",
    "\n",
    "xclim's `create_ensemble()` method overcomes these constraints, selecting the common time period to all files and assigns a standard calendar type to the dataset.\n",
    "\n",
    "<div class=\"alert alert-info\">\n",
    "\n",
    "Input netcdf files still require equal spatial dimension size (e.g. lon, lat dimensions).\n",
    "\n",
    "</div>\n",
    "\n",
    "Given files all named `ens_tas_m[member number].nc`, we use `glob` to get a list of all those files."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "import xarray as xr\n",
    "\n",
    "from xclim import ensembles\n",
    "\n",
    "# Set display to HTML style (for fancy output)\n",
    "xr.set_options(display_style=\"html\", display_width=50)\n",
    "\n",
    "%matplotlib inline\n",
    "\n",
    "ens = ensembles.create_ensemble(data_folder.glob(\"ens_tas_m*.nc\")).load()\n",
    "ens.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.style.use(\"seaborn-v0_8-dark\")\n",
    "plt.rcParams[\"figure.figsize\"] = (13, 5)\n",
    "ens.tas.plot(hue=\"realization\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ens.tas  # Attributes of the first dataset to be opened are copied to the final output"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Ensemble statistics\n",
    "Beyond creating an ensemble dataset, the `xclim.ensembles` module contains functions for calculating statistics between realizations\n",
    "\n",
    "**Ensemble mean, standard-deviation, max & min**\n",
    "\n",
    "In the example below, we use xclim's `ensemble_mean_std_max_min()` to calculate statistics across the 10 realizations in our test dataset. Output variables are created combining the original variable name `tas` with additional ending indicating the statistic calculated on the realization dimension : `_mean`, `_stdev`, `_min`, `_max`\n",
    "\n",
    "The resulting output now contains 4 derived variables from the original single variable in our ensemble dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ens_stats = ensembles.ensemble_mean_std_max_min(ens)\n",
    "ens_stats"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Ensemble percentiles\n",
    "\n",
    "Here, we use xclim's `ensemble_percentiles()` to calculate percentile values across the 10 realizations.\n",
    "The output has now a `percentiles` dimension instead of `realization`. Split variables can be created instead, by specifying `split=True` (the variable name `tas` will be appended with `_p{x}`). Compared to NumPy's `percentile()` and xarray's `quantile()`, this method handles more efficiently dataset with invalid values and the chunking along the realization dimension (which is automatic when dask arrays are used)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ens_perc = ensembles.ensemble_percentiles(ens, values=[15, 50, 85], split=False)\n",
    "ens_perc"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots()\n",
    "ax.fill_between(\n",
    "    ens_stats.time.values,\n",
    "    ens_stats.tas_min,\n",
    "    ens_stats.tas_max,\n",
    "    alpha=0.3,\n",
    "    label=\"Min-Max\",\n",
    ")\n",
    "ax.fill_between(\n",
    "    ens_perc.time.values,\n",
    "    ens_perc.tas.sel(percentiles=15),\n",
    "    ens_perc.tas.sel(percentiles=85),\n",
    "    alpha=0.5,\n",
    "    label=\"Perc. 15-85\",\n",
    ")\n",
    "ax._get_lines.get_next_color()  # Hack to get different line\n",
    "ax._get_lines.get_next_color()\n",
    "ax.plot(ens_stats.time.values, ens_stats.tas_mean, linewidth=2, label=\"Mean\")\n",
    "ax.plot(ens_perc.time.values, ens_perc.tas.sel(percentiles=50), linewidth=2, label=\"Median\")\n",
    "ax.legend()\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Change significance and model agreement\n",
    "\n",
    "When communicating climate change through plots of projected change, it is often useful to add information on the statistical significance of the values. A common way to represent this information without overloading the figures is through hatching patterns superimposed on the primary data. Two aspects are usually shown:\n",
    "\n",
    "- change significance: whether most of the ensemble members project a statistically significant climate change signal, in comparison to their internal variability.\n",
    "- model agreement: whether the different ensemble members agree on the sign of the change.\n",
    "\n",
    "We can then divide the plotted points into categories each with its own hatching pattern, usually leaving the robust data (models agree and enough show a significant change) without hatching.\n",
    "\n",
    "Xclim provides some tools to help in generating these hatching masks. First is [xc.ensembles.robustness_fractions](../apidoc/xclim.ensembles.rst#xclim.ensembles._robustness.robustness_fractions) that can characterize the change significance and sign agreement across ensemble members. To demonstrate its usage, we'll first generate some fake annual mean temperature data. Here, `ref` is the data on the reference period and `fut` is a future projection. There are five (5) different members in the ensemble. We tweaked the generation so that all models agree on significant change in the \"South\" while agreement and significance of change decreases as we go North and East."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import xarray as xr\n",
    "from matplotlib.patches import Rectangle\n",
    "\n",
    "xr.set_options(keep_attrs=True)\n",
    "\n",
    "# Reference period\n",
    "ref = xr.DataArray(\n",
    "    20 * np.random.random_sample((5, 30, 10, 10)) + 275,\n",
    "    dims=(\"realization\", \"time\", \"lat\", \"lon\"),\n",
    "    coords={\n",
    "        \"time\": xr.date_range(\"1990\", periods=30, freq=\"YS\"),\n",
    "        \"lat\": np.arange(40, 50),\n",
    "        \"lon\": np.arange(-70, -60),\n",
    "    },\n",
    "    attrs={\"units\": \"K\"},\n",
    ")\n",
    "\n",
    "# Future\n",
    "fut = xr.DataArray(\n",
    "    20 * np.random.random_sample((5, 30, 10, 10)) + 275,\n",
    "    dims=(\"realization\", \"time\", \"lat\", \"lon\"),\n",
    "    coords={\n",
    "        \"time\": xr.date_range(\"2070\", periods=30, freq=\"YS\"),\n",
    "        \"lat\": np.arange(40, 50),\n",
    "        \"lon\": np.arange(-70, -60),\n",
    "    },\n",
    "    attrs={\"units\": \"K\"},\n",
    ")\n",
    "# Add change.\n",
    "fut = fut + xr.concat(\n",
    "    [xr.DataArray(np.linspace(15, north_delta, num=10), dims=(\"lat\",)) for north_delta in [15, 10, 0, -7, -10]],\n",
    "    \"realization\",\n",
    ")\n",
    "\n",
    "deltas = (fut.mean(\"time\") - ref.mean(\"time\")).assign_attrs(long_name=\"Temperature change\")\n",
    "mean_delta = deltas.mean(\"realization\")\n",
    "deltas.plot(col=\"realization\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Change significance can be determined in a lot of different ways. Xclim provides some simple and some more complicated statistical test in `robustness_fractions`. In this example, we'll follow the suggestions found in the Cross-Chapter Box 1 of the [IPCC Atlas chapter (AR6, WG1)](https://doi.org/10.1017/9781009157896.021). Specifically, we are following Approach C, using the alternative for when pre-industrial control data is not available.\n",
    "\n",
    "We first compute the different fractions for each robustness aspect."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fractions = ensembles.robustness_fractions(fut, ref, test=\"ipcc-ar6-c\")\n",
    "fractions"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In this output we have:\n",
    "\n",
    "- `changed` : The fraction of members showing significant change.\n",
    "- `positive` : The fraction of members showing positive change, no matter if it is significant or not.\n",
    "- `changed_positive` : The fraction of members showing significant AND positive change.\n",
    "- `agree` : The fraction of members agreeing on the sign of change. This is the maximum between `positive` and `1 - positive`.\n",
    "- `valid` : The fraction of \"valid\" members. A member is valid is there are no NaNs along the time axes of `fut` and  `ref`. In our case, it is 1 everywhere.\n",
    "\n",
    "For example, here's the plot of the fraction of members showing significant change."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fractions.changed.plot(figsize=(6, 4))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Xclim provides all this so that one can construct their own robustness maps the way they want. Often, hatching overlays are based on categories defined by some thresholds on the significant change and agreement fractions. The [`xclim.ensembles.robustness_categories`](../apidoc/xclim.ensembles.rst#xclim.ensembles._robustness.robustness_categories) function helps for that common case and defaults to the categories and thresholds used by the IPCC in its Atlas."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "robustness = ensembles.robustness_categories(fractions)\n",
    "robustness"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The output is a categorical map following the \"flag variables\" CF conventions. Parameters needed for plotting are found in the attributes."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "robustness.plot(figsize=(6, 4))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Matplotlib doesn't provide an easy way of plotting categorial data with a proper legend, so our real plotting script is a bit more complicated, but xclim's output makes it easier."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cmap = mpl.colors.ListedColormap([\"none\"])  # So we can deactivate pcolor's colormapping\n",
    "\n",
    "fig, ax = plt.subplots(figsize=(6, 4))\n",
    "mean_delta.plot(ax=ax)\n",
    "# For each flag value plot the corresponding hatch.\n",
    "for val, ha in zip(robustness.flag_values, [None, \"\\\\\\\\\\\\\", \"xxx\"], strict=False):\n",
    "    ax.pcolor(\n",
    "        robustness.lon,\n",
    "        robustness.lat,\n",
    "        robustness.where(robustness == val),\n",
    "        hatch=ha,\n",
    "        cmap=cmap,\n",
    "    )\n",
    "\n",
    "ax.legend(\n",
    "    handles=[\n",
    "        Rectangle((0, 0), 2, 2, fill=False, hatch=h, label=lbl)\n",
    "        for h, lbl in zip([\"\\\\\\\\\\\\\", \"xxx\"], robustness.flag_descriptions[1:], strict=False)\n",
    "    ],\n",
    "    bbox_to_anchor=(0.0, 1.1),\n",
    "    loc=\"upper left\",\n",
    "    ncols=2,\n",
    ");"
   ]
  }
 ],
 "metadata": {
  "celltoolbar": "Edit Metadata",
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}