From 9dc9fe769d9a94d908ff97fcf55960d39011502b Mon Sep 17 00:00:00 2001
From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com>
Date: Fri, 20 Sep 2024 17:24:54 -0400
Subject: [PATCH] update notebooks to work with changes to dask chunking

---
 ...tting_and_bias_correcting_CMIP6_data.ipynb |  99 +++++----
 ...e_change_impact_study_on_a_watershed.ipynb | 200 +++++++++---------
 2 files changed, 155 insertions(+), 144 deletions(-)

diff --git a/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb b/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
index cf02d991..9adf89c6 100644
--- a/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
+++ b/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
@@ -53,7 +53,6 @@
     "import xclim\n",
     "import xclim.sdba as sdba\n",
     "from clisops.core import average, subset\n",
-    "\n",
     "from ravenpy.utilities.testdata import get_file\n",
     "\n",
     "tmp = Path(tempfile.mkdtemp())"
@@ -85,10 +84,10 @@
    },
    "outputs": [],
    "source": [
-    "# We get the basin contour for testing on a server. You can replace the getfile method by a string containing the path\n",
-    "# to your own geojson\n",
+    "# We get the basin contour for testing on a server.\n",
+    "# You can replace the getfile method by a string containing the path to your own geojson.\n",
     "\n",
-    "# Get basin contour\n",
+    "# Get basin contour.\n",
     "basin_contour = get_file(\"notebook_inputs/input.geojson\")\n",
     "\n",
     "reference_start_day = dt.datetime(1980, 12, 31)\n",
@@ -158,15 +157,16 @@
    },
    "outputs": [],
    "source": [
-    "# Prepare the filesystem that allows reading data. Data is read on the Google Cloud Services, which host a copy of the CMIP6 (and other) data.\n",
+    "# Prepare the filesystem that allows reading data.\n",
+    "# Data is read on the Google Cloud Services, which host a copy of the CMIP6 (and other) data.\n",
     "fsCMIP = gcsfs.GCSFileSystem(token=\"anon\", access=\"read_only\")\n",
     "\n",
-    "# Get the catalog info from the pangeo dataset, which basically is a list of links to the various products.\n",
+    "# Get the catalog info from the PANGEO dataset, which basically is a list of links to the various products.\n",
     "col = intake.open_esm_datastore(\n",
     "    \"https://storage.googleapis.com/cmip6/pangeo-cmip6.json\"\n",
     ")\n",
     "\n",
-    "# Print the contents of the catalog, so we can see the classification system\n",
+    "# Print the contents of the catalog, so we can see the classification system.\n",
     "display(col)"
    ]
   },
@@ -185,7 +185,8 @@
    },
    "outputs": [],
    "source": [
-    "# Get the list of models. Replace \"source_id\" with any of the catalog categories (table_id, activity_id, variable_id, etc.)\n",
+    "# Get the list of models.\n",
+    "# Replace \"source_id\" with any of the catalog categories (table_id, activity_id, variable_id, etc.)\n",
     "list(col.df.source_id.unique())"
    ]
   },
@@ -225,11 +226,10 @@
     "    member_id=\"r1i1p1f1\",\n",
     "    source_id=climate_model,\n",
     ")\n",
-    "col_subset = col.search(\n",
-    "    require_all_on=[\"source_id\"], **query\n",
-    ")  # Command that will return the filtered list\n",
+    "# Return the filtered list.\n",
+    "col_subset = col.search(require_all_on=[\"source_id\"], **query)\n",
     "\n",
-    "# Show the filtered list:\n",
+    "# Show the filtered list.\n",
     "display(col_subset.df)"
    ]
   },
@@ -249,7 +249,7 @@
    },
    "outputs": [],
    "source": [
-    "# Get the object locator object\n",
+    "# Get the object locator object.\n",
     "mapper = fsCMIP.get_mapper(col_subset.df.zstore[0])"
    ]
   },
@@ -277,17 +277,20 @@
    },
    "outputs": [],
    "source": [
-    "# Get the CMIP6 data from Google Cloud and read it in memory using xarray. This is done via \"lazy loading\" and is not actually reading the data in memory\n",
-    "# yet, but is keeping track of what it will need to get, eventually.\n",
+    "# Get the CMIP6 data from Google Cloud and read it in memory using xarray.\n",
+    "# This is done via \"lazy loading\" and is not actually reading the data in memory yet, but is keeping track of what it will need to get, eventually.\n",
     "ds = xr.open_zarr(mapper, consolidated=True)\n",
     "\n",
-    "# Convert to numpy.datetime64 object to be compatbile\n",
-    "if type(ds.time[0].values) is not type(np.datetime64(\"1980-01-01\")):\n",
-    "    ds = ds.convert_calendar(\"standard\")\n",
+    "# Convert to numpy.datetime64 object for compatibility.\n",
+    "ds = ds.convert_calendar(\"standard\")\n",
     "\n",
-    "# Extract only the dates that we really want. Again, this is done via lazy loading, and is not actually using memory at this point.\n",
+    "# Extract only the dates that we really want.\n",
+    "# Again, this is done via lazy loading, and is not actually using memory at this point.\n",
     "ds = ds.sel(time=slice(reference_start_day, reference_end_day))\n",
     "\n",
+    "# Set the date to the midnight of the given day.\n",
+    "ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n",
+    "\n",
     "# Use the clisops subsetting tools to extract the data for the watershed boundaries and take the spatial average\n",
     "ds = average.average_shape(ds, basin_contour)\n",
     "\n",
@@ -322,14 +325,16 @@
     "    with xr.set_options(keep_attrs=True):\n",
     "        ds = xr.open_zarr(mapper, consolidated=True)\n",
     "\n",
-    "        # Convert to numpy.datetime64 object to be compatbile\n",
-    "        if type(ds.time[0].values) is not type(np.datetime64(\"1980-01-01\")):\n",
-    "            ds = ds.convert_calendar(\"standard\")\n",
+    "        # Convert to numpy.datetime64 object for compatibility.\n",
+    "        ds = ds.convert_calendar(\"standard\")\n",
     "\n",
-    "        # Compute average over region\n",
+    "        # Set the date to the midnight of the given day.\n",
+    "        ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n",
+    "\n",
+    "        # Compute the average over region.\n",
     "        out = average.average_shape(ds.sel(time=slice(start, end)), geometry)\n",
     "\n",
-    "        # Convert geometry variables into attributes\n",
+    "        # Convert geometry variables into attributes.\n",
     "        attrs = {\n",
     "            key: out[key].values.item()\n",
     "            for key in out.coords\n",
@@ -431,9 +436,15 @@
     "    ERA5_reference = subset.subset_shape(\n",
     "        ds.sel(time=slice(reference_start_day, reference_end_day)), basin_contour\n",
     "    ).mean({\"latitude\", \"longitude\"})\n",
-    "    ERA5_tmin = ERA5_reference[\"t2m\"].resample(time=\"1D\").min().chunk(-1, -1, -1)\n",
-    "    ERA5_tmax = ERA5_reference[\"t2m\"].resample(time=\"1D\").max().chunk(-1, -1, -1)\n",
-    "    ERA5_pr = ERA5_reference[\"tp\"].resample(time=\"1D\").sum().chunk(-1, -1, -1)"
+    "    ERA5_tmin = (\n",
+    "        ERA5_reference.t2m.resample(time=\"1D\")\n",
+    "        .min()\n",
+    "        .chunk(\n",
+    "            time=-1,\n",
+    "        )\n",
+    "    )\n",
+    "    ERA5_tmax = ERA5_reference.t2m.resample(time=\"1D\").max().chunk(time=-1)\n",
+    "    ERA5_pr = ERA5_reference.tp.resample(time=\"1D\").sum().chunk(time=-1)"
    ]
   },
   {
@@ -455,8 +466,8 @@
    },
    "outputs": [],
    "source": [
-    "# Here we need to make sure that our units are all in the correct format. You can play around with the tools we've seen thus far to explore the units\n",
-    "# and make sure everything is consistent.\n",
+    "# Here we need to make sure that our units are all in the correct format.\n",
+    "# You can play around with the tools we've seen thus far to explore the units and make sure everything is consistent.\n",
     "\n",
     "# Let's start with precipitation:\n",
     "ERA5_pr = xclim.core.units.convert_units_to(ERA5_pr, \"mm\", context=\"hydro\")\n",
@@ -497,7 +508,7 @@
    },
    "outputs": [],
    "source": [
-    "# Use xclim utilities (sbda) to give information on the type of window used for the bias correction.\n",
+    "# Use xclim utilities (SDBA) to give information on the type of window used for the bias correction.\n",
     "group_month_window = sdba.utils.Grouper(\"time.dayofyear\", window=15)\n",
     "\n",
     "# This is an adjusting function. It builds the tool that will perform the corrections.\n",
@@ -505,17 +516,17 @@
     "    ref=ERA5_pr, hist=historical_pr, nquantiles=50, kind=\"+\", group=group_month_window\n",
     ")\n",
     "\n",
-    "# Apply the correction factors on the reference period\n",
+    "# Apply the correction factors on the reference period.\n",
     "corrected_ref_precip = Adjustment.adjust(historical_pr, interp=\"linear\")\n",
     "\n",
-    "# Apply the correction factors on the future period\n",
+    "# Apply the correction factors on the future period.\n",
     "corrected_fut_precip = Adjustment.adjust(future_pr, interp=\"linear\")\n",
     "\n",
-    "# Ensure that the precipitation is non-negative, which can happen with some climate models\n",
+    "# Ensure that the precipitation is non-negative, which can happen with some climate models.\n",
     "corrected_ref_precip = corrected_ref_precip.where(corrected_ref_precip > 0, 0)\n",
     "corrected_fut_precip = corrected_fut_precip.where(corrected_fut_precip > 0, 0)\n",
     "\n",
-    "# Train the model to find the correction factors for the maximum temperature (tasmax) data\n",
+    "# Train the model to find the correction factors for the maximum temperature (tasmax) data.\n",
     "Adjustment = sdba.DetrendedQuantileMapping.train(\n",
     "    ref=ERA5_tmax,\n",
     "    hist=historical_tasmax,\n",
@@ -524,13 +535,13 @@
     "    group=group_month_window,\n",
     ")\n",
     "\n",
-    "# Apply the correction factors on the reference period\n",
+    "# Apply the correction factors on the reference period.\n",
     "corrected_ref_tasmax = Adjustment.adjust(historical_tasmax, interp=\"linear\")\n",
     "\n",
-    "# Apply the correction factors on the future period\n",
+    "# Apply the correction factors on the future period.\n",
     "corrected_fut_tasmax = Adjustment.adjust(future_tasmax, interp=\"linear\")\n",
     "\n",
-    "# Train the model to find the correction factors for the minimum temperature (tasmin) data\n",
+    "# Train the model to find the correction factors for the minimum temperature (tasmin) data.\n",
     "Adjustment = sdba.DetrendedQuantileMapping.train(\n",
     "    ref=ERA5_tmin,\n",
     "    hist=historical_tasmin,\n",
@@ -561,7 +572,8 @@
    },
    "outputs": [],
    "source": [
-    "# Convert the reference corrected data into netCDF file. We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n",
+    "# Convert the reference corrected data into netCDF file.\n",
+    "# We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n",
     "ref_dataset = xr.merge(\n",
     "    [\n",
     "        corrected_ref_precip.to_dataset(name=\"pr\"),\n",
@@ -570,11 +582,11 @@
     "    ]\n",
     ")\n",
     "\n",
-    "# Write to temporary folder\n",
+    "# Write to temporary folder.\n",
     "fn_ref = tmp / \"reference_dataset.nc\"\n",
     "ref_dataset.to_netcdf(fn_ref)\n",
     "\n",
-    "# Convert the future corrected data into netCDF file\n",
+    "# Convert the future corrected data into netCDF file.\n",
     "fut_dataset = xr.merge(\n",
     "    [\n",
     "        corrected_fut_precip.to_dataset(name=\"pr\"),\n",
@@ -610,13 +622,6 @@
     "# Compare it to the future precipitation without bias-correction.\n",
     "future_pr.plot()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb
index 41b2d4c5..4a7e12a8 100644
--- a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb
+++ b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb
@@ -49,41 +49,37 @@
     "\n",
     "import datetime as dt\n",
     "\n",
-    "# Basic system packages\n",
+    "# Basic system packages:\n",
     "import os\n",
     "import tempfile\n",
     "import warnings\n",
     "from pathlib import Path\n",
     "\n",
-    "# Packages for data extraction on remote servers/filesystems\n",
-    "import fsspec\n",
+    "# Packages for data extraction on remote servers/filesystems:\n",
     "import gcsfs\n",
     "import geopandas as gpd\n",
     "\n",
-    "# Packages for geographic processing\n",
+    "# Packages for geographic processing:\n",
     "import intake\n",
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
-    "import rasterio\n",
-    "import rioxarray as rio\n",
-    "import s3fs\n",
     "\n",
     "# Packages related to ravenpy and hydrological modelling:\n",
     "import spotpy\n",
     "import xarray as xr\n",
     "\n",
-    "# Packages required for data processing\n",
+    "# Packages required for data processing:\n",
     "import xclim\n",
     "import xclim.sdba as sdba\n",
     "from birdy import WPSClient\n",
     "from clisops.core import average, subset\n",
     "from dask.diagnostics import ProgressBar\n",
+    "from ravenpy.utilities.calibration import SpotSetup\n",
+    "from ravenpy.utilities.testdata import get_file\n",
     "\n",
     "from ravenpy import Emulator\n",
     "from ravenpy.config import commands as rc\n",
-    "from ravenpy.config.emulators import GR4JCN\n",
-    "from ravenpy.utilities.calibration import SpotSetup\n",
-    "from ravenpy.utilities.testdata import get_file"
+    "from ravenpy.config.emulators import GR4JCN"
    ]
   },
   {
@@ -109,10 +105,10 @@
     "    \"WPS_URL\", \"https://pavics.ouranos.ca/twitcher/ows/proxy/raven/wps\"\n",
     ")\n",
     "\n",
-    "# Connect to the PAVICS-Hydro Raven WPS server to get the geospatial data from GeoServer\n",
+    "# Connect to the PAVICS-Hydro Raven WPS server to get the geospatial data from GeoServer.\n",
     "wps = WPSClient(url)\n",
     "\n",
-    "# Make a temporary path where the data will be stored and used by Raven\n",
+    "# Make a temporary path where the data will be stored and used by Raven.\n",
     "tmp = Path(tempfile.mkdtemp())"
    ]
   },
@@ -132,14 +128,14 @@
    },
    "outputs": [],
    "source": [
-    "# Name of the watershed boundaries file that is uploaded to the server. Note that this file contains the\n",
-    "# .shx, .shp and other associated files for shapefiles, all zipped into one file. It will also be used later for\n",
-    "# extracting meteorological data.\n",
+    "# Name of the watershed boundaries file that is uploaded to the server.\n",
+    "# Note that this file contains the .shx, .shp and other associated files for shapefiles, all zipped into one file.\n",
+    "# It will also be used later for extracting meteorological data.\n",
     "basin_contour = get_file(\"paper/shapefile_basin_574_HYSETS.zip\")\n",
     "\n",
-    "# This file is an extraction of streamflow for catchment 574 in HYSETS. Weather data will be gathered later from\n",
-    "# the ERA5 database, but could also be taken directly from HYSETS. This is to show how the process could be linked\n",
-    "# together for your own applications using ERA5 data.\n",
+    "# This file is an extraction of streamflow for catchment 574 in HYSETS.\n",
+    "# Weather data will be gathered later from the ERA5 database, but could also be taken directly from HYSETS.\n",
+    "# This is to show how the process could be linked together for your own applications using ERA5 data.\n",
     "streamflow_file = get_file(\"paper/Qobs_574_HYSETS.nc\")"
    ]
   },
@@ -162,13 +158,13 @@
     "# Reference period that will be used for ERA5 and climate model data for the reference period.\n",
     "# Here let's focus on a 10-year period to keep running times lower.\n",
     "reference_start_day = dt.datetime(1980, 12, 31)\n",
-    "reference_end_day = dt.datetime(1991, 1, 1)\n",
-    "# Notice we are using one day before and one day after the desired period of 1981-01-01 to 1990-12-31.\n",
+    "reference_end_day = dt.datetime(1990, 12, 31)\n",
+    "# Notice we are using one day before the desired period of 1981-01-01 to 1990-12-31.\n",
     "# This is to account for any UTC shifts that might require getting data in a previous or later time.\n",
     "\n",
-    "# Same process for the future period, 100 years later\n",
+    "# Same process for the future period, 100 years later.\n",
     "future_start_day = dt.datetime(2080, 12, 31)\n",
-    "future_end_day = dt.datetime(2091, 1, 1)"
+    "future_end_day = dt.datetime(2090, 12, 31)"
    ]
   },
   {
@@ -369,10 +365,13 @@
     "cat = intake.open_catalog(catalog_name)\n",
     "ds = cat.era5_reanalysis_single_levels.to_dask()\n",
     "\n",
+    "# Set the date to the midnight of the given day.\n",
+    "ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n",
+    "\n",
     "\"\"\"\n",
-    "Get the ERA5 data. We will rechunk it to a single chunck to make it compatible with other codes on the platform,\n",
-    "especially bias-correction. We are also taking the daily min and max temperatures as well as the daily total\n",
-    "precipitation.\n",
+    "Get the ERA5 data. \n",
+    "We will rechunk it to a single chunk to make it compatible with other codes on the platform especially bias-correction.\n",
+    "We are also taking the daily min and max temperatures as well as the daily total precipitation.\n",
     "\"\"\"\n",
     "# We will add a wrapper to ensure that the following operations will preserve the original data attributes,\n",
     "# such as units and variable names.\n",
@@ -380,9 +379,9 @@
     "    ERA5_reference = subset.subset_shape(\n",
     "        ds.sel(time=slice(reference_start_day, reference_end_day)), basin_contour\n",
     "    )\n",
-    "    ERA5_tmin = ERA5_reference[\"t2m\"].resample(time=\"1D\").min().chunk(-1, -1, -1)\n",
-    "    ERA5_tmax = ERA5_reference[\"t2m\"].resample(time=\"1D\").max().chunk(-1, -1, -1)\n",
-    "    ERA5_pr = ERA5_reference[\"tp\"].resample(time=\"1D\").sum().chunk(-1, -1, -1)\n",
+    "    ERA5_tmin = ERA5_reference.t2m.resample(time=\"1D\").min().chunk(time=-1)\n",
+    "    ERA5_tmax = ERA5_reference.t2m.resample(time=\"1D\").max().chunk(time=-1)\n",
+    "    ERA5_pr = ERA5_reference.tp.resample(time=\"1D\").sum().chunk(time=-1)\n",
     "\n",
     "    # Change the units\n",
     "    ERA5_tmin = ERA5_tmin - 273.15  # K to °C\n",
@@ -407,8 +406,9 @@
     "    ERA5_tmax = ERA5_tmax.to_dataset(name=\"tmax\", promote_attrs=True)\n",
     "    ERA5_pr = ERA5_pr.to_dataset(name=\"pr\", promote_attrs=True)\n",
     "\n",
-    "    # Write to disk. Here is where we write to disk and where the notebook will fail if running it from the\n",
-    "    # original location on the server (which is read-only). Please move the notebooks to your writable-workspace.\n",
+    "    # Write to disk.\n",
+    "    # Here is where we write to disk and where the notebook will fail if running it from the original location on the server (which is read-only).\n",
+    "    # Please move the notebooks to your writable-workspace.\n",
     "    ERA5_weather = xr.merge([ERA5_tmin, ERA5_tmax, ERA5_pr])\n",
     "    ERA5_weather.to_netcdf(tmp / \"ERA5_meteo_data.nc\")"
    ]
@@ -430,10 +430,10 @@
    },
    "outputs": [],
    "source": [
-    "# Climate model to use\n",
+    "# Climate model to use.\n",
     "climate_model = \"MIROC6\"\n",
     "\n",
-    "# Get the catalog info from the pangeo dataset, which basically is a list of links to the various products.\n",
+    "# Get the catalog info from the PANGEO dataset, which basically is a list of links to the various products.\n",
     "fsCMIP = gcsfs.GCSFileSystem(token=\"anon\", access=\"read_only\")\n",
     "col = intake.open_esm_datastore(\n",
     "    \"https://storage.googleapis.com/cmip6/pangeo-cmip6.json\"\n",
@@ -471,6 +471,9 @@
     "            if \"height\" in ds.coords:\n",
     "                ds = ds.drop_vars(\"height\")\n",
     "\n",
+    "            # Set the date to the midnight of the given day.\n",
+    "            ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n",
+    "\n",
     "            out[exp][variable] = average.average_shape(\n",
     "                ds,\n",
     "                basin_contour,\n",
@@ -505,8 +508,8 @@
    },
    "outputs": [],
    "source": [
-    "# Here we need to make sure that our units are all in the correct format. You can play around with the tools we've seen thus far to explore the units\n",
-    "# and make sure everything is consistent.\n",
+    "# Here we need to make sure that our units are all in the correct format.\n",
+    "# You can play around with the tools we've seen thus far to explore the units and make sure everything is consistent.\n",
     "\n",
     "# Let's start with precipitation:\n",
     "# The CMIP data is a rate rather than an absolute value, so let's get the absolute values:\n",
@@ -541,10 +544,11 @@
    },
    "outputs": [],
    "source": [
-    "# Use xclim utilities (sbda) to give information on the type of window used for the bias correction.\n",
+    "# Use xclim utilities (SDBA) to give information on the type of window used for the bias correction.\n",
     "group_month_window = sdba.utils.Grouper(\"time.dayofyear\", window=15)\n",
     "\n",
-    "# This is an adjusting function. It builds the tool that will perform the corrections.\n",
+    "# This is an adjusting function.\n",
+    "# It builds the tool that will perform the corrections.\n",
     "Adjustment = sdba.DetrendedQuantileMapping.train(\n",
     "    ref=ERA5_weather.pr,\n",
     "    hist=historical_pr,\n",
@@ -553,17 +557,17 @@
     "    group=group_month_window,\n",
     ")\n",
     "\n",
-    "# Apply the correction factors on the reference period\n",
+    "# Apply the correction factors on the reference period.\n",
     "corrected_ref_precip = Adjustment.adjust(historical_pr, interp=\"linear\")\n",
     "\n",
-    "# Apply the correction factors on the future period\n",
+    "# Apply the correction factors on the future period.\n",
     "corrected_fut_precip = Adjustment.adjust(future_pr, interp=\"linear\")\n",
     "\n",
-    "# Ensure that the precipitation is non-negative, which can happen with some climate models\n",
+    "# Ensure that the precipitation is non-negative, which can happen with some climate models.\n",
     "corrected_ref_precip = corrected_ref_precip.where(corrected_ref_precip > 0, 0)\n",
     "corrected_fut_precip = corrected_fut_precip.where(corrected_fut_precip > 0, 0)\n",
     "\n",
-    "# Train the model to find the correction factors for the maximum temperature (tasmax) data\n",
+    "# Train the model to find the correction factors for the maximum temperature (tasmax) data.\n",
     "Adjustment = sdba.DetrendedQuantileMapping.train(\n",
     "    ref=ERA5_weather.tmax,\n",
     "    hist=historical_tasmax,\n",
@@ -572,13 +576,13 @@
     "    group=group_month_window,\n",
     ")\n",
     "\n",
-    "# Apply the correction factors on the reference period\n",
+    "# Apply the correction factors on the reference period.\n",
     "corrected_ref_tasmax = Adjustment.adjust(historical_tasmax, interp=\"linear\")\n",
     "\n",
-    "# Apply the correction factors on the future period\n",
+    "# Apply the correction factors on the future period.\n",
     "corrected_fut_tasmax = Adjustment.adjust(future_tasmax, interp=\"linear\")\n",
     "\n",
-    "# Train the model to find the correction factors for the minimum temperature (tasmin) data\n",
+    "# Train the model to find the correction factors for the minimum temperature (tasmin) data.\n",
     "Adjustment = sdba.DetrendedQuantileMapping.train(\n",
     "    ref=ERA5_weather.tmin,\n",
     "    hist=historical_tasmin,\n",
@@ -587,10 +591,10 @@
     "    group=group_month_window,\n",
     ")\n",
     "\n",
-    "# Apply the correction factors on the reference period\n",
+    "# Apply the correction factors on the reference period.\n",
     "corrected_ref_tasmin = Adjustment.adjust(historical_tasmin, interp=\"linear\")\n",
     "\n",
-    "# Apply the correction factors on the future period\n",
+    "# Apply the correction factors on the future period.\n",
     "corrected_fut_tasmin = Adjustment.adjust(future_tasmin, interp=\"linear\")"
    ]
   },
@@ -611,7 +615,8 @@
    },
    "outputs": [],
    "source": [
-    "# Convert the reference corrected data into netCDF file. We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n",
+    "# Convert the reference corrected data into netCDF file.\n",
+    "# We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n",
     "ref_dataset = xr.merge(\n",
     "    [\n",
     "        corrected_ref_precip.to_dataset(name=\"pr\"),\n",
@@ -620,11 +625,11 @@
     "    ]\n",
     ")\n",
     "\n",
-    "# Write to temporary folder\n",
+    "# Write to temporary folder.\n",
     "fn_tmp_ref = tmp / \"reference_dataset_tmp.nc\"\n",
     "ref_dataset.to_netcdf(fn_tmp_ref)\n",
     "\n",
-    "# Convert the future corrected data into netCDF file\n",
+    "# Convert the future corrected data into netCDF file.\n",
     "fut_dataset = xr.merge(\n",
     "    [\n",
     "        corrected_fut_precip.to_dataset(name=\"pr\"),\n",
@@ -632,7 +637,7 @@
     "        corrected_fut_tasmin.to_dataset(name=\"tasmin\"),\n",
     "    ]\n",
     ")\n",
-    "# Write to temporary folder\n",
+    "# Write to temporary folder.\n",
     "with ProgressBar():\n",
     "    fn_tmp_fut = tmp / \"future_dataset_tmp.nc\"\n",
     "    fut_dataset.to_netcdf(fn_tmp_fut)\n",
@@ -664,19 +669,18 @@
    },
    "outputs": [],
    "source": [
-    "# Define the hydrological response unit. We can use the geographic information we gathered previously to\n",
-    "# populate the fields for the HRU.\n",
-    "hru = {}\n",
-    "hru = dict(\n",
-    "    area=all_properties[\"area\"],\n",
-    "    elevation=all_properties[\"elevation\"],\n",
-    "    latitude=all_properties[\"latitude\"],\n",
-    "    longitude=all_properties[\"longitude\"],\n",
-    "    hru_type=\"land\",\n",
-    ")\n",
+    "# Define the hydrological response unit.\n",
+    "# We can use the geographic information we gathered previously to populate the fields for the HRU.\n",
+    "hru = {\n",
+    "    \"area\": all_properties[\"area\"],\n",
+    "    \"elevation\": all_properties[\"elevation\"],\n",
+    "    \"latitude\": all_properties[\"latitude\"],\n",
+    "    \"longitude\": all_properties[\"longitude\"],\n",
+    "    \"hru_type\": \"land\",\n",
+    "}\n",
     "\n",
-    "# Establish the start date for the calibration. This is set in the model configuration, so the calibrator\n",
-    "# will simply execute the model which has been pre-configured to run on this period.\n",
+    "# Establish the start date for the calibration.\n",
+    "# This is set in the model configuration, so the calibrator will simply execute the model which has been pre-configured to run on this period.\n",
     "start_date = dt.datetime(1981, 1, 1)\n",
     "end_date = dt.datetime(1985, 12, 31)\n",
     "\n",
@@ -690,8 +694,8 @@
     "    \"PRECIP\": \"pr\",\n",
     "}\n",
     "\n",
-    "# The data keywords necessary to indicate the elevation, latitude and longitude of the ERA5 forcing data. Here\n",
-    "# we use the information for the basin average as the ERA5 data is averaged on the watershed.\n",
+    "# The data keywords necessary to indicate the elevation, latitude and longitude of the ERA5 forcing data.\n",
+    "# Here we use the information for the basin average as the ERA5 data is averaged on the watershed.\n",
     "data_kwds = {\n",
     "    \"ALL\": {\n",
     "        \"elevation\": hru[\"elevation\"],\n",
@@ -700,10 +704,10 @@
     "    }\n",
     "}\n",
     "\n",
-    "# Give a name to the simulation\n",
+    "# Give a name to the simulation.\n",
     "run_name = \"Paper_example_simulation\"\n",
     "\n",
-    "# Setup the gauge object that includes meteorological data from ERA5\n",
+    "# Set up the gauge object that includes meteorological data from ERA5.\n",
     "gauge = [\n",
     "    rc.Gauge.from_nc(\n",
     "        tmp\n",
@@ -717,8 +721,8 @@
     "# Read the streamflow from the HYSETS catchment data for this basin\n",
     "discharge_data = [rc.ObservationData.from_nc(streamflow_file, alt_names=\"discharge\")]\n",
     "\n",
-    "# Which evaluation metric do we want to use for calibration. Raven will return this by default after each run,\n",
-    "# and the optimizer will read it directly to calibrate.\n",
+    "# Which evaluation metric do we want to use for calibration.\n",
+    "# Raven will return this by default after each run, and the optimizer will read it directly to calibrate.\n",
     "eval_metrics = (\"NASH_SUTCLIFFE\",)\n",
     "\n",
     "# Build the model configuration according to user preferences and inputs\n",
@@ -751,12 +755,14 @@
    },
    "outputs": [],
    "source": [
-    "# In order to calibrate your model, you need to give the lower and higher bounds of the model. In this case,\n",
-    "# we are passing the boundaries for a GR4JCN, but it's important to change them, if you are using another model.\n",
+    "# In order to calibrate your model, you need to give the lower and higher bounds of the model.\n",
+    "# In this case, we are passing the boundaries for a GR4JCN, but it's important to change them, if you are using another model.\n",
     "low = (0.01, -15.0, 10.0, 0.0, 1.0, 0.0)\n",
     "high = (2.5, 10.0, 700.0, 7.0, 30.0, 1.0)\n",
     "\n",
-    "# Random seed. We will provide one for consistency purposes, but operationnaly this should not be provided.\n",
+    "# Random seed.\n",
+    "# We will provide one for consistency purposes, but operationally this should not be provided.\n",
+    "# FIXME: This will change in numpy v2.0, so we will need to update this code then.\n",
     "random_seed = 42\n",
     "np.random.seed(random_seed)\n",
     "\n",
@@ -767,39 +773,39 @@
     "    high=high,\n",
     ")\n",
     "\n",
-    "# Maximum number of model evaluations. We only use 200 here to keep the computation time as low as possible,\n",
-    "# but you will want to increase this for operational use, perhaps to 2000-5000 depending on the model.\n",
+    "# Maximum number of model evaluations.\n",
+    "# We only use 200 here to keep the computation time as low as possible, but you will want to increase this for operational use, perhaps to 2000-5000 depending on the model.\n",
     "max_iterations = 200\n",
     "\n",
-    "# Setup the spotpy sampler with the method, the setup configuration, a run name and other options. Please refer\n",
-    "# to the spotpy documentation for more options. We recommend sticking to this format for efficiency of most\n",
-    "# applications. Here we use DDS as the optimization algorithm. More are available: see the Spotpy documentation\n",
-    "# for more information. Here, DDS is used as it is powerful and particularly useful for optimizations with small\n",
-    "# evaluation budgets. For more details on DDS, see:\n",
+    "# Set up the spotpy sampler with the method, the setup configuration, a run name and other options.\n",
+    "# Please refer to the spotpy documentation for more options.\n",
+    "# We recommend sticking to this format for efficiency of most applications.\n",
+    "# Here we use DDS as the optimization algorithm. More are available: see the Spotpy documentation for more information.\n",
+    "# Here, DDS is used as it is powerful and particularly useful for optimizations with small evaluation budgets.\n",
+    "# For more details on DDS, see:\n",
     "#\n",
-    "# Tolson, B.A. and Shoemaker, C.A., 2007. Dynamically dimensioned search algorithm for computationally efficient watershed model calibration. Water\n",
-    "# Resources Research, 43(1)\n",
+    "# Tolson, B.A. and Shoemaker, C.A., 2007. Dynamically dimensioned search algorithm for computationally efficient watershed model calibration. Water Resources Research, 43(1)\n",
     "sampler = spotpy.algorithms.dds(\n",
     "    spot_setup, dbname=\"RAVEN_model_run\", dbformat=\"ram\", save_sim=False\n",
     ")\n",
     "\n",
-    "# Launch the actual optimization. Multiple trials can be launched, where the entire process is repeated and\n",
-    "# the best overall value from all trials is returned.\n",
+    "# Launch the actual optimization.\n",
+    "# Multiple trials can be launched, where the entire process is repeated and the best overall value from all trials is returned.\n",
     "sampler.sample(max_iterations, trials=1)\n",
     "\n",
-    "# Get the model diagnostics\n",
+    "# Get the model diagnostics.\n",
     "diag = spot_setup.diagnostics\n",
     "\n",
-    "# Get all the values of each iteration\n",
+    "# Get all the values of each iteration.\n",
     "results = sampler.getdata()\n",
     "\n",
-    "# Get the raw resutlts directly in an array\n",
+    "# Get the raw results directly in an array.\n",
     "bestindex, bestobjfun = spotpy.analyser.get_maxlikeindex(\n",
     "    results\n",
-    ")  # Want to get the MAX NSE (change for min for RMSE)\n",
+    ")  # Want to get the MAX NSE (change for min for RMSE).\n",
     "best_model_run = list(\n",
     "    results[bestindex][0]\n",
-    ")  # Get the parameter set returning the best NSE\n",
+    ")  # Get the parameter set returning the best NSE.\n",
     "optimized_parameters = best_model_run[\n",
     "    1:-1\n",
     "]  # Remove the NSE value (position 0) and the ID at the last position to get the actual parameter set.\n",
@@ -834,10 +840,10 @@
     "\n",
     "sim_output = Emulator(config=model_validation).run()\n",
     "\n",
-    "# Get validation NSE (note we are counting the first year without warm-up)\n",
+    "# Get validation NSE (note we are counting the first year without warm-up).\n",
     "NSE = sim_output.diagnostics[\"DIAG_NASH_SUTCLIFFE\"]\n",
     "\n",
-    "# Plot the model output\n",
+    "# Plot the model output.\n",
     "sim_output.hydrograph.q_sim.plot(color=\"blue\", label=\"Simulation\")\n",
     "sim_output.hydrograph.q_obs.plot(color=\"black\", label=\"Observation\")\n",
     "plt.legend()\n",
@@ -866,11 +872,11 @@
    },
    "outputs": [],
    "source": [
-    "# Setup a gauge for Raven to read-in the reference climate data, just like for ERA5\n",
+    "# Set up a gauge for Raven to read-in the reference climate data, just like for ERA5.\n",
     "gauge_ref = [\n",
     "    rc.Gauge.from_nc(\n",
     "        tmp\n",
-    "        / \"reference_dataset.nc\",  # Path to the CMIP6 model reference data netcdf file\n",
+    "        / \"reference_dataset.nc\",  # Path to the CMIP6 model reference data netcdf file.\n",
     "        data_type=data_type,\n",
     "        alt_names=alt_names,\n",
     "        data_kwds=data_kwds,\n",
@@ -881,15 +887,15 @@
     "model_config_reference = model_validation.duplicate(\n",
     "    Gauge=gauge_ref,\n",
     "    StartDate=reference_start_day\n",
-    "    + dt.timedelta(days=1),  # Add a day here to account for the UTC lag in ERA5\n",
+    "    + dt.timedelta(days=1),  # Add a day here to account for the UTC lag in ERA5.\n",
     "    EndDate=reference_end_day,\n",
     ")\n",
     "\n",
     "# Run the model from the configuration and get the outputs.\n",
     "ref_output = Emulator(config=model_config_reference).run()\n",
     "\n",
-    "# Plot the model output. Note that both simulations should have similar hydrological\n",
-    "# regime but day-to-day variability is not expected to match.\n",
+    "# Plot the model output.\n",
+    "# Note that both simulations should have similar hydrological regime but day-to-day variability is not expected to match.\n",
     "ref_output.hydrograph.q_sim.plot(color=\"blue\", label=\"Reference period simulation\")\n",
     "ref_output.hydrograph.q_obs.plot(color=\"black\", label=\"Observation\")\n",
     "plt.legend()\n",
@@ -914,7 +920,7 @@
    },
    "outputs": [],
    "source": [
-    "# Setup a gauge for Raven to read-in the future climate data, just like for the reference data\n",
+    "# Set up a gauge for Raven to read-in the future climate data, just like for the reference data.\n",
     "gauge_fut = [\n",
     "    rc.Gauge.from_nc(\n",
     "        tmp / \"future_dataset.nc\",  # Path to the CMIP6 model reference data netcdf file\n",
@@ -935,7 +941,7 @@
     "# Run the model and get the outputs and hydrographs.\n",
     "fut_output = Emulator(config=model_config_future).run()\n",
     "\n",
-    "# Plot the model output\n",
+    "# Plot the model output.\n",
     "fut_output.hydrograph.q_sim.plot(color=\"blue\", label=\"Future simulation\")\n",
     "plt.legend()\n",
     "plt.title(\"Future period\")\n",
@@ -972,7 +978,7 @@
     "reference_flows = ref_output.hydrograph.q_sim.groupby(\"time.dayofyear\").mean()\n",
     "future_flows = fut_output.hydrograph.q_sim.groupby(\"time.dayofyear\").mean()\n",
     "\n",
-    "# Plot the model output\n",
+    "# Plot the model output.\n",
     "observed_flows.plot(color=\"black\", label=\"Observation\", x=\"dayofyear\")\n",
     "simulated_flows.plot(color=\"green\", label=\"Simulation\", x=\"dayofyear\")\n",
     "reference_flows.plot(color=\"blue\", label=\"Reference\", x=\"dayofyear\")\n",