Source code for xclim.core.datachecks

"""
Data Checks
===========

Utilities designed to check the validity of data inputs.
"""

from __future__ import annotations

from collections.abc import Sequence

import xarray as xr

from .calendar import compare_offsets, parse_offset
from .options import datacheck
from .utils import ValidationError


[docs] @datacheck def check_freq(var: xr.DataArray, freq: str | Sequence[str], strict: bool = True): """Raise an error if not series has not the expected temporal frequency or is not monotonically increasing. Parameters ---------- var : xr.DataArray Input array. freq : str or sequence of str The expected temporal frequencies, using Pandas frequency terminology ({'Y', 'M', 'D', 'h', 'min', 's', 'ms', 'us'}) and multiples thereof. To test strictly for 'W', pass '7D' with `strict=True`. This ignores the start/end flag and the anchor (ex: 'YS-JUL' will validate against 'Y'). strict : bool Whether multiples of the frequencies are considered invalid or not. With `strict` set to False, a '3h' series will not raise an error if freq is set to 'h'. Raises ------ ValidationError - If the frequency of `var` is not inferrable. - If the frequency of `var` does not match the requested `freq`. """ if isinstance(freq, str): freq = [freq] exp_base = [parse_offset(frq)[1] for frq in freq] v_freq = xr.infer_freq(var.time) if v_freq is None: raise ValidationError( "Unable to infer the frequency of the time series. " "To mute this, set xclim's option data_validation='log'." ) v_base = parse_offset(v_freq)[1] if v_base not in exp_base or ( strict and all(compare_offsets(v_freq, "!=", frq) for frq in freq) ): raise ValidationError( f"Frequency of time series not {'strictly' if strict else ''} in {freq}. " "To mute this, set xclim's option data_validation='log'." )
[docs] def check_daily(var: xr.DataArray): """Raise an error if not series has a frequency other that daily, or is not monotonically increasing. Notes ----- This does not check for gaps in series. """ return check_freq(var, "D")
[docs] @datacheck def check_common_time(inputs: Sequence[xr.DataArray]): """Raise an error if the list of inputs doesn't have a single common frequency. Raises ------ ValidationError - if the frequency of any input can't be inferred - if inputs have different frequencies - if inputs have a daily or hourly frequency, but they are not given at the same time of day. Parameters ---------- inputs : Sequence of xr.DataArray Input arrays. """ # Check all have the same freq freqs = [xr.infer_freq(da.time) for da in inputs] if None in freqs: raise ValidationError( "Unable to infer the frequency of the time series. " "To mute this, set xclim's option data_validation='log'." ) if len(set(freqs)) != 1: raise ValidationError( f"Inputs have different frequencies. Got : {freqs}." "To mute this, set xclim's option data_validation='log'." ) # Check if anchor is the same freq = freqs[0] base = parse_offset(freq)[1] fmt = {"h": ":%M", "D": "%H:%M"} if base in fmt: outs = {da.indexes["time"][0].strftime(fmt[base]) for da in inputs} if len(outs) > 1: raise ValidationError( f"All inputs have the same frequency ({freq}), but they are not anchored on the same minutes (got {outs}). " f"xarray's alignment would silently fail. You can try to fix this with `da.resample('{freq}').mean()`." "To mute this, set xclim's option data_validation='log'." )