Source code for netcdf_scm.normalisation.running_mean

"""
Module for the normaliser which calculates anomalies from a running mean in the pre-industrial control run
"""
import logging

from scmdata import ScmRun

from ..cli_utils import _check_timesteps_are_monthly
from .base import Normaliser

LOGGER = logging.getLogger(__name__)


[docs]class NormaliserRunningMean(Normaliser): """ Normaliser which calculates anomalies from a running mean in the pre-industrial control run Each normalisation value is an n-year mean, centred on the equivalent point in the pre-industrial control simulation. If there is insufficient data to create a full n-year window at the edge of the simulation then a linear extrapolation of the running-mean is used to extend the normalisation values to cover the required full range. """ def __init__(self, nyears=21): """ Intialise Parameters ---------- nyears : int Number of years to use in the running-mean. """ self._nyears = nyears self._method_name = "{}-yr-running-mean".format(nyears) def _get_reference_values(self, indata, picontrol, picontrol_branching_time): running_mean_n_years = self._nyears months_per_year = 12 window_width = running_mean_n_years * months_per_year window_roll_back = window_width // 2 # the window extends one step fewer forward because the centre time point # is also included window_roll_forward = window_roll_back - 1 branch_year = picontrol_branching_time.year branch_month = picontrol_branching_time.month picontrol_branch_col = picontrol.filter(year=branch_year, month=branch_month)[ "time" ][0] picontrol_time = picontrol["time"] picontrol_branch_idx = (picontrol_time == picontrol_branch_col).idxmax() # relevant times go from the branch point minus the number of steps each # rolling mean window extends backward up to the branch point plus the # length of ``indata`` plus the number of steps the rolling mean window # extends forward relevant_time_points = picontrol_time[ max(picontrol_branch_idx - window_roll_back, 0) : picontrol_branch_idx + indata["time"].shape[0] + window_roll_forward ].tolist() _check_timesteps_are_monthly(picontrol) # quick sanity check # super slow option below, would be good to have direct functionality in scmdata # normalise_ts = picontrol.filter(time=relevant_time_points.tolist()).timeseries() picontrol_times = (picontrol["time"] >= relevant_time_points[0]) & ( picontrol["time"] <= relevant_time_points[-1] ) normalise_ts = picontrol.timeseries().iloc[:, picontrol_times.values] normalise_df = normalise_ts.rolling( window=window_width, center=True, axis="columns", ).mean() keep_time_points = picontrol_time[ picontrol_branch_idx : picontrol_branch_idx + indata["time"].shape[0] ].tolist() raise_error = False try: normalise_df = normalise_df[keep_time_points] except KeyError: raise_error = True if normalise_df.isnull().any().any(): LOGGER.info( "Filling gaps in running mean (where not enough values were available to create a full " "window) with linear extrapolations." ) target_times = normalise_df.columns normalise_df = ( ScmRun(normalise_df) .interpolate(target_times=target_times, extrapolation_type="linear") .timeseries() ) if normalise_df.isnull().any().any(): # if there are any remaining gaps, raise raise_error = True try: normalise_df.columns = indata["time"] except ValueError: raise_error = True if raise_error: error_msg = ( "Only `{:04d}{:02d}` to `{:04d}{:02d}` is available in the " "{} data. Given the branching time, `{:04d}{:02d}`, we need " "data from ~`{:04d}` to `{:04d}`. {} data in {}".format( picontrol["time"].min().year, picontrol["time"].min().month, picontrol["time"].max().year, picontrol["time"].max().month, picontrol.metadata["experiment_id"], picontrol_branching_time.year, picontrol_branching_time.month, picontrol_branching_time.year - 11, picontrol_branching_time.year + indata["year"].max() - indata["year"].min() + 11, picontrol.metadata["experiment_id"], picontrol.metadata["netcdf-scm crunched file"], ) ) raise ValueError(error_msg) return normalise_df