From 9dc9fe769d9a94d908ff97fcf55960d39011502b Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Fri, 20 Sep 2024 17:24:54 -0400 Subject: [PATCH] update notebooks to work with changes to dask chunking --- ...tting_and_bias_correcting_CMIP6_data.ipynb | 99 +++++---- ...e_change_impact_study_on_a_watershed.ipynb | 200 +++++++++--------- 2 files changed, 155 insertions(+), 144 deletions(-) diff --git a/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb b/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb index cf02d991..9adf89c6 100644 --- a/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb +++ b/docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb @@ -53,7 +53,6 @@ "import xclim\n", "import xclim.sdba as sdba\n", "from clisops.core import average, subset\n", - "\n", "from ravenpy.utilities.testdata import get_file\n", "\n", "tmp = Path(tempfile.mkdtemp())" @@ -85,10 +84,10 @@ }, "outputs": [], "source": [ - "# We get the basin contour for testing on a server. You can replace the getfile method by a string containing the path\n", - "# to your own geojson\n", + "# We get the basin contour for testing on a server.\n", + "# You can replace the getfile method by a string containing the path to your own geojson.\n", "\n", - "# Get basin contour\n", + "# Get basin contour.\n", "basin_contour = get_file(\"notebook_inputs/input.geojson\")\n", "\n", "reference_start_day = dt.datetime(1980, 12, 31)\n", @@ -158,15 +157,16 @@ }, "outputs": [], "source": [ - "# Prepare the filesystem that allows reading data. Data is read on the Google Cloud Services, which host a copy of the CMIP6 (and other) data.\n", + "# Prepare the filesystem that allows reading data.\n", + "# Data is read on the Google Cloud Services, which host a copy of the CMIP6 (and other) data.\n", "fsCMIP = gcsfs.GCSFileSystem(token=\"anon\", access=\"read_only\")\n", "\n", - "# Get the catalog info from the pangeo dataset, which basically is a list of links to the various products.\n", + "# Get the catalog info from the PANGEO dataset, which basically is a list of links to the various products.\n", "col = intake.open_esm_datastore(\n", " \"https://storage.googleapis.com/cmip6/pangeo-cmip6.json\"\n", ")\n", "\n", - "# Print the contents of the catalog, so we can see the classification system\n", + "# Print the contents of the catalog, so we can see the classification system.\n", "display(col)" ] }, @@ -185,7 +185,8 @@ }, "outputs": [], "source": [ - "# Get the list of models. Replace \"source_id\" with any of the catalog categories (table_id, activity_id, variable_id, etc.)\n", + "# Get the list of models.\n", + "# Replace \"source_id\" with any of the catalog categories (table_id, activity_id, variable_id, etc.)\n", "list(col.df.source_id.unique())" ] }, @@ -225,11 +226,10 @@ " member_id=\"r1i1p1f1\",\n", " source_id=climate_model,\n", ")\n", - "col_subset = col.search(\n", - " require_all_on=[\"source_id\"], **query\n", - ") # Command that will return the filtered list\n", + "# Return the filtered list.\n", + "col_subset = col.search(require_all_on=[\"source_id\"], **query)\n", "\n", - "# Show the filtered list:\n", + "# Show the filtered list.\n", "display(col_subset.df)" ] }, @@ -249,7 +249,7 @@ }, "outputs": [], "source": [ - "# Get the object locator object\n", + "# Get the object locator object.\n", "mapper = fsCMIP.get_mapper(col_subset.df.zstore[0])" ] }, @@ -277,17 +277,20 @@ }, "outputs": [], "source": [ - "# Get the CMIP6 data from Google Cloud and read it in memory using xarray. This is done via \"lazy loading\" and is not actually reading the data in memory\n", - "# yet, but is keeping track of what it will need to get, eventually.\n", + "# Get the CMIP6 data from Google Cloud and read it in memory using xarray.\n", + "# This is done via \"lazy loading\" and is not actually reading the data in memory yet, but is keeping track of what it will need to get, eventually.\n", "ds = xr.open_zarr(mapper, consolidated=True)\n", "\n", - "# Convert to numpy.datetime64 object to be compatbile\n", - "if type(ds.time[0].values) is not type(np.datetime64(\"1980-01-01\")):\n", - " ds = ds.convert_calendar(\"standard\")\n", + "# Convert to numpy.datetime64 object for compatibility.\n", + "ds = ds.convert_calendar(\"standard\")\n", "\n", - "# Extract only the dates that we really want. Again, this is done via lazy loading, and is not actually using memory at this point.\n", + "# Extract only the dates that we really want.\n", + "# Again, this is done via lazy loading, and is not actually using memory at this point.\n", "ds = ds.sel(time=slice(reference_start_day, reference_end_day))\n", "\n", + "# Set the date to the midnight of the given day.\n", + "ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n", + "\n", "# Use the clisops subsetting tools to extract the data for the watershed boundaries and take the spatial average\n", "ds = average.average_shape(ds, basin_contour)\n", "\n", @@ -322,14 +325,16 @@ " with xr.set_options(keep_attrs=True):\n", " ds = xr.open_zarr(mapper, consolidated=True)\n", "\n", - " # Convert to numpy.datetime64 object to be compatbile\n", - " if type(ds.time[0].values) is not type(np.datetime64(\"1980-01-01\")):\n", - " ds = ds.convert_calendar(\"standard\")\n", + " # Convert to numpy.datetime64 object for compatibility.\n", + " ds = ds.convert_calendar(\"standard\")\n", "\n", - " # Compute average over region\n", + " # Set the date to the midnight of the given day.\n", + " ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n", + "\n", + " # Compute the average over region.\n", " out = average.average_shape(ds.sel(time=slice(start, end)), geometry)\n", "\n", - " # Convert geometry variables into attributes\n", + " # Convert geometry variables into attributes.\n", " attrs = {\n", " key: out[key].values.item()\n", " for key in out.coords\n", @@ -431,9 +436,15 @@ " ERA5_reference = subset.subset_shape(\n", " ds.sel(time=slice(reference_start_day, reference_end_day)), basin_contour\n", " ).mean({\"latitude\", \"longitude\"})\n", - " ERA5_tmin = ERA5_reference[\"t2m\"].resample(time=\"1D\").min().chunk(-1, -1, -1)\n", - " ERA5_tmax = ERA5_reference[\"t2m\"].resample(time=\"1D\").max().chunk(-1, -1, -1)\n", - " ERA5_pr = ERA5_reference[\"tp\"].resample(time=\"1D\").sum().chunk(-1, -1, -1)" + " ERA5_tmin = (\n", + " ERA5_reference.t2m.resample(time=\"1D\")\n", + " .min()\n", + " .chunk(\n", + " time=-1,\n", + " )\n", + " )\n", + " ERA5_tmax = ERA5_reference.t2m.resample(time=\"1D\").max().chunk(time=-1)\n", + " ERA5_pr = ERA5_reference.tp.resample(time=\"1D\").sum().chunk(time=-1)" ] }, { @@ -455,8 +466,8 @@ }, "outputs": [], "source": [ - "# Here we need to make sure that our units are all in the correct format. You can play around with the tools we've seen thus far to explore the units\n", - "# and make sure everything is consistent.\n", + "# Here we need to make sure that our units are all in the correct format.\n", + "# You can play around with the tools we've seen thus far to explore the units and make sure everything is consistent.\n", "\n", "# Let's start with precipitation:\n", "ERA5_pr = xclim.core.units.convert_units_to(ERA5_pr, \"mm\", context=\"hydro\")\n", @@ -497,7 +508,7 @@ }, "outputs": [], "source": [ - "# Use xclim utilities (sbda) to give information on the type of window used for the bias correction.\n", + "# Use xclim utilities (SDBA) to give information on the type of window used for the bias correction.\n", "group_month_window = sdba.utils.Grouper(\"time.dayofyear\", window=15)\n", "\n", "# This is an adjusting function. It builds the tool that will perform the corrections.\n", @@ -505,17 +516,17 @@ " ref=ERA5_pr, hist=historical_pr, nquantiles=50, kind=\"+\", group=group_month_window\n", ")\n", "\n", - "# Apply the correction factors on the reference period\n", + "# Apply the correction factors on the reference period.\n", "corrected_ref_precip = Adjustment.adjust(historical_pr, interp=\"linear\")\n", "\n", - "# Apply the correction factors on the future period\n", + "# Apply the correction factors on the future period.\n", "corrected_fut_precip = Adjustment.adjust(future_pr, interp=\"linear\")\n", "\n", - "# Ensure that the precipitation is non-negative, which can happen with some climate models\n", + "# Ensure that the precipitation is non-negative, which can happen with some climate models.\n", "corrected_ref_precip = corrected_ref_precip.where(corrected_ref_precip > 0, 0)\n", "corrected_fut_precip = corrected_fut_precip.where(corrected_fut_precip > 0, 0)\n", "\n", - "# Train the model to find the correction factors for the maximum temperature (tasmax) data\n", + "# Train the model to find the correction factors for the maximum temperature (tasmax) data.\n", "Adjustment = sdba.DetrendedQuantileMapping.train(\n", " ref=ERA5_tmax,\n", " hist=historical_tasmax,\n", @@ -524,13 +535,13 @@ " group=group_month_window,\n", ")\n", "\n", - "# Apply the correction factors on the reference period\n", + "# Apply the correction factors on the reference period.\n", "corrected_ref_tasmax = Adjustment.adjust(historical_tasmax, interp=\"linear\")\n", "\n", - "# Apply the correction factors on the future period\n", + "# Apply the correction factors on the future period.\n", "corrected_fut_tasmax = Adjustment.adjust(future_tasmax, interp=\"linear\")\n", "\n", - "# Train the model to find the correction factors for the minimum temperature (tasmin) data\n", + "# Train the model to find the correction factors for the minimum temperature (tasmin) data.\n", "Adjustment = sdba.DetrendedQuantileMapping.train(\n", " ref=ERA5_tmin,\n", " hist=historical_tasmin,\n", @@ -561,7 +572,8 @@ }, "outputs": [], "source": [ - "# Convert the reference corrected data into netCDF file. We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n", + "# Convert the reference corrected data into netCDF file.\n", + "# We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n", "ref_dataset = xr.merge(\n", " [\n", " corrected_ref_precip.to_dataset(name=\"pr\"),\n", @@ -570,11 +582,11 @@ " ]\n", ")\n", "\n", - "# Write to temporary folder\n", + "# Write to temporary folder.\n", "fn_ref = tmp / \"reference_dataset.nc\"\n", "ref_dataset.to_netcdf(fn_ref)\n", "\n", - "# Convert the future corrected data into netCDF file\n", + "# Convert the future corrected data into netCDF file.\n", "fut_dataset = xr.merge(\n", " [\n", " corrected_fut_precip.to_dataset(name=\"pr\"),\n", @@ -610,13 +622,6 @@ "# Compare it to the future precipitation without bias-correction.\n", "future_pr.plot()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb index 41b2d4c5..4a7e12a8 100644 --- a/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb +++ b/docs/notebooks/paper/Perform_a_climate_change_impact_study_on_a_watershed.ipynb @@ -49,41 +49,37 @@ "\n", "import datetime as dt\n", "\n", - "# Basic system packages\n", + "# Basic system packages:\n", "import os\n", "import tempfile\n", "import warnings\n", "from pathlib import Path\n", "\n", - "# Packages for data extraction on remote servers/filesystems\n", - "import fsspec\n", + "# Packages for data extraction on remote servers/filesystems:\n", "import gcsfs\n", "import geopandas as gpd\n", "\n", - "# Packages for geographic processing\n", + "# Packages for geographic processing:\n", "import intake\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", - "import rasterio\n", - "import rioxarray as rio\n", - "import s3fs\n", "\n", "# Packages related to ravenpy and hydrological modelling:\n", "import spotpy\n", "import xarray as xr\n", "\n", - "# Packages required for data processing\n", + "# Packages required for data processing:\n", "import xclim\n", "import xclim.sdba as sdba\n", "from birdy import WPSClient\n", "from clisops.core import average, subset\n", "from dask.diagnostics import ProgressBar\n", + "from ravenpy.utilities.calibration import SpotSetup\n", + "from ravenpy.utilities.testdata import get_file\n", "\n", "from ravenpy import Emulator\n", "from ravenpy.config import commands as rc\n", - "from ravenpy.config.emulators import GR4JCN\n", - "from ravenpy.utilities.calibration import SpotSetup\n", - "from ravenpy.utilities.testdata import get_file" + "from ravenpy.config.emulators import GR4JCN" ] }, { @@ -109,10 +105,10 @@ " \"WPS_URL\", \"https://pavics.ouranos.ca/twitcher/ows/proxy/raven/wps\"\n", ")\n", "\n", - "# Connect to the PAVICS-Hydro Raven WPS server to get the geospatial data from GeoServer\n", + "# Connect to the PAVICS-Hydro Raven WPS server to get the geospatial data from GeoServer.\n", "wps = WPSClient(url)\n", "\n", - "# Make a temporary path where the data will be stored and used by Raven\n", + "# Make a temporary path where the data will be stored and used by Raven.\n", "tmp = Path(tempfile.mkdtemp())" ] }, @@ -132,14 +128,14 @@ }, "outputs": [], "source": [ - "# Name of the watershed boundaries file that is uploaded to the server. Note that this file contains the\n", - "# .shx, .shp and other associated files for shapefiles, all zipped into one file. It will also be used later for\n", - "# extracting meteorological data.\n", + "# Name of the watershed boundaries file that is uploaded to the server.\n", + "# Note that this file contains the .shx, .shp and other associated files for shapefiles, all zipped into one file.\n", + "# It will also be used later for extracting meteorological data.\n", "basin_contour = get_file(\"paper/shapefile_basin_574_HYSETS.zip\")\n", "\n", - "# This file is an extraction of streamflow for catchment 574 in HYSETS. Weather data will be gathered later from\n", - "# the ERA5 database, but could also be taken directly from HYSETS. This is to show how the process could be linked\n", - "# together for your own applications using ERA5 data.\n", + "# This file is an extraction of streamflow for catchment 574 in HYSETS.\n", + "# Weather data will be gathered later from the ERA5 database, but could also be taken directly from HYSETS.\n", + "# This is to show how the process could be linked together for your own applications using ERA5 data.\n", "streamflow_file = get_file(\"paper/Qobs_574_HYSETS.nc\")" ] }, @@ -162,13 +158,13 @@ "# Reference period that will be used for ERA5 and climate model data for the reference period.\n", "# Here let's focus on a 10-year period to keep running times lower.\n", "reference_start_day = dt.datetime(1980, 12, 31)\n", - "reference_end_day = dt.datetime(1991, 1, 1)\n", - "# Notice we are using one day before and one day after the desired period of 1981-01-01 to 1990-12-31.\n", + "reference_end_day = dt.datetime(1990, 12, 31)\n", + "# Notice we are using one day before the desired period of 1981-01-01 to 1990-12-31.\n", "# This is to account for any UTC shifts that might require getting data in a previous or later time.\n", "\n", - "# Same process for the future period, 100 years later\n", + "# Same process for the future period, 100 years later.\n", "future_start_day = dt.datetime(2080, 12, 31)\n", - "future_end_day = dt.datetime(2091, 1, 1)" + "future_end_day = dt.datetime(2090, 12, 31)" ] }, { @@ -369,10 +365,13 @@ "cat = intake.open_catalog(catalog_name)\n", "ds = cat.era5_reanalysis_single_levels.to_dask()\n", "\n", + "# Set the date to the midnight of the given day.\n", + "ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n", + "\n", "\"\"\"\n", - "Get the ERA5 data. We will rechunk it to a single chunck to make it compatible with other codes on the platform,\n", - "especially bias-correction. We are also taking the daily min and max temperatures as well as the daily total\n", - "precipitation.\n", + "Get the ERA5 data. \n", + "We will rechunk it to a single chunk to make it compatible with other codes on the platform especially bias-correction.\n", + "We are also taking the daily min and max temperatures as well as the daily total precipitation.\n", "\"\"\"\n", "# We will add a wrapper to ensure that the following operations will preserve the original data attributes,\n", "# such as units and variable names.\n", @@ -380,9 +379,9 @@ " ERA5_reference = subset.subset_shape(\n", " ds.sel(time=slice(reference_start_day, reference_end_day)), basin_contour\n", " )\n", - " ERA5_tmin = ERA5_reference[\"t2m\"].resample(time=\"1D\").min().chunk(-1, -1, -1)\n", - " ERA5_tmax = ERA5_reference[\"t2m\"].resample(time=\"1D\").max().chunk(-1, -1, -1)\n", - " ERA5_pr = ERA5_reference[\"tp\"].resample(time=\"1D\").sum().chunk(-1, -1, -1)\n", + " ERA5_tmin = ERA5_reference.t2m.resample(time=\"1D\").min().chunk(time=-1)\n", + " ERA5_tmax = ERA5_reference.t2m.resample(time=\"1D\").max().chunk(time=-1)\n", + " ERA5_pr = ERA5_reference.tp.resample(time=\"1D\").sum().chunk(time=-1)\n", "\n", " # Change the units\n", " ERA5_tmin = ERA5_tmin - 273.15 # K to °C\n", @@ -407,8 +406,9 @@ " ERA5_tmax = ERA5_tmax.to_dataset(name=\"tmax\", promote_attrs=True)\n", " ERA5_pr = ERA5_pr.to_dataset(name=\"pr\", promote_attrs=True)\n", "\n", - " # Write to disk. Here is where we write to disk and where the notebook will fail if running it from the\n", - " # original location on the server (which is read-only). Please move the notebooks to your writable-workspace.\n", + " # Write to disk.\n", + " # Here is where we write to disk and where the notebook will fail if running it from the original location on the server (which is read-only).\n", + " # Please move the notebooks to your writable-workspace.\n", " ERA5_weather = xr.merge([ERA5_tmin, ERA5_tmax, ERA5_pr])\n", " ERA5_weather.to_netcdf(tmp / \"ERA5_meteo_data.nc\")" ] @@ -430,10 +430,10 @@ }, "outputs": [], "source": [ - "# Climate model to use\n", + "# Climate model to use.\n", "climate_model = \"MIROC6\"\n", "\n", - "# Get the catalog info from the pangeo dataset, which basically is a list of links to the various products.\n", + "# Get the catalog info from the PANGEO dataset, which basically is a list of links to the various products.\n", "fsCMIP = gcsfs.GCSFileSystem(token=\"anon\", access=\"read_only\")\n", "col = intake.open_esm_datastore(\n", " \"https://storage.googleapis.com/cmip6/pangeo-cmip6.json\"\n", @@ -471,6 +471,9 @@ " if \"height\" in ds.coords:\n", " ds = ds.drop_vars(\"height\")\n", "\n", + " # Set the date to the midnight of the given day.\n", + " ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n", + "\n", " out[exp][variable] = average.average_shape(\n", " ds,\n", " basin_contour,\n", @@ -505,8 +508,8 @@ }, "outputs": [], "source": [ - "# Here we need to make sure that our units are all in the correct format. You can play around with the tools we've seen thus far to explore the units\n", - "# and make sure everything is consistent.\n", + "# Here we need to make sure that our units are all in the correct format.\n", + "# You can play around with the tools we've seen thus far to explore the units and make sure everything is consistent.\n", "\n", "# Let's start with precipitation:\n", "# The CMIP data is a rate rather than an absolute value, so let's get the absolute values:\n", @@ -541,10 +544,11 @@ }, "outputs": [], "source": [ - "# Use xclim utilities (sbda) to give information on the type of window used for the bias correction.\n", + "# Use xclim utilities (SDBA) to give information on the type of window used for the bias correction.\n", "group_month_window = sdba.utils.Grouper(\"time.dayofyear\", window=15)\n", "\n", - "# This is an adjusting function. It builds the tool that will perform the corrections.\n", + "# This is an adjusting function.\n", + "# It builds the tool that will perform the corrections.\n", "Adjustment = sdba.DetrendedQuantileMapping.train(\n", " ref=ERA5_weather.pr,\n", " hist=historical_pr,\n", @@ -553,17 +557,17 @@ " group=group_month_window,\n", ")\n", "\n", - "# Apply the correction factors on the reference period\n", + "# Apply the correction factors on the reference period.\n", "corrected_ref_precip = Adjustment.adjust(historical_pr, interp=\"linear\")\n", "\n", - "# Apply the correction factors on the future period\n", + "# Apply the correction factors on the future period.\n", "corrected_fut_precip = Adjustment.adjust(future_pr, interp=\"linear\")\n", "\n", - "# Ensure that the precipitation is non-negative, which can happen with some climate models\n", + "# Ensure that the precipitation is non-negative, which can happen with some climate models.\n", "corrected_ref_precip = corrected_ref_precip.where(corrected_ref_precip > 0, 0)\n", "corrected_fut_precip = corrected_fut_precip.where(corrected_fut_precip > 0, 0)\n", "\n", - "# Train the model to find the correction factors for the maximum temperature (tasmax) data\n", + "# Train the model to find the correction factors for the maximum temperature (tasmax) data.\n", "Adjustment = sdba.DetrendedQuantileMapping.train(\n", " ref=ERA5_weather.tmax,\n", " hist=historical_tasmax,\n", @@ -572,13 +576,13 @@ " group=group_month_window,\n", ")\n", "\n", - "# Apply the correction factors on the reference period\n", + "# Apply the correction factors on the reference period.\n", "corrected_ref_tasmax = Adjustment.adjust(historical_tasmax, interp=\"linear\")\n", "\n", - "# Apply the correction factors on the future period\n", + "# Apply the correction factors on the future period.\n", "corrected_fut_tasmax = Adjustment.adjust(future_tasmax, interp=\"linear\")\n", "\n", - "# Train the model to find the correction factors for the minimum temperature (tasmin) data\n", + "# Train the model to find the correction factors for the minimum temperature (tasmin) data.\n", "Adjustment = sdba.DetrendedQuantileMapping.train(\n", " ref=ERA5_weather.tmin,\n", " hist=historical_tasmin,\n", @@ -587,10 +591,10 @@ " group=group_month_window,\n", ")\n", "\n", - "# Apply the correction factors on the reference period\n", + "# Apply the correction factors on the reference period.\n", "corrected_ref_tasmin = Adjustment.adjust(historical_tasmin, interp=\"linear\")\n", "\n", - "# Apply the correction factors on the future period\n", + "# Apply the correction factors on the future period.\n", "corrected_fut_tasmin = Adjustment.adjust(future_tasmin, interp=\"linear\")" ] }, @@ -611,7 +615,8 @@ }, "outputs": [], "source": [ - "# Convert the reference corrected data into netCDF file. We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n", + "# Convert the reference corrected data into netCDF file.\n", + "# We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n", "ref_dataset = xr.merge(\n", " [\n", " corrected_ref_precip.to_dataset(name=\"pr\"),\n", @@ -620,11 +625,11 @@ " ]\n", ")\n", "\n", - "# Write to temporary folder\n", + "# Write to temporary folder.\n", "fn_tmp_ref = tmp / \"reference_dataset_tmp.nc\"\n", "ref_dataset.to_netcdf(fn_tmp_ref)\n", "\n", - "# Convert the future corrected data into netCDF file\n", + "# Convert the future corrected data into netCDF file.\n", "fut_dataset = xr.merge(\n", " [\n", " corrected_fut_precip.to_dataset(name=\"pr\"),\n", @@ -632,7 +637,7 @@ " corrected_fut_tasmin.to_dataset(name=\"tasmin\"),\n", " ]\n", ")\n", - "# Write to temporary folder\n", + "# Write to temporary folder.\n", "with ProgressBar():\n", " fn_tmp_fut = tmp / \"future_dataset_tmp.nc\"\n", " fut_dataset.to_netcdf(fn_tmp_fut)\n", @@ -664,19 +669,18 @@ }, "outputs": [], "source": [ - "# Define the hydrological response unit. We can use the geographic information we gathered previously to\n", - "# populate the fields for the HRU.\n", - "hru = {}\n", - "hru = dict(\n", - " area=all_properties[\"area\"],\n", - " elevation=all_properties[\"elevation\"],\n", - " latitude=all_properties[\"latitude\"],\n", - " longitude=all_properties[\"longitude\"],\n", - " hru_type=\"land\",\n", - ")\n", + "# Define the hydrological response unit.\n", + "# We can use the geographic information we gathered previously to populate the fields for the HRU.\n", + "hru = {\n", + " \"area\": all_properties[\"area\"],\n", + " \"elevation\": all_properties[\"elevation\"],\n", + " \"latitude\": all_properties[\"latitude\"],\n", + " \"longitude\": all_properties[\"longitude\"],\n", + " \"hru_type\": \"land\",\n", + "}\n", "\n", - "# Establish the start date for the calibration. This is set in the model configuration, so the calibrator\n", - "# will simply execute the model which has been pre-configured to run on this period.\n", + "# Establish the start date for the calibration.\n", + "# This is set in the model configuration, so the calibrator will simply execute the model which has been pre-configured to run on this period.\n", "start_date = dt.datetime(1981, 1, 1)\n", "end_date = dt.datetime(1985, 12, 31)\n", "\n", @@ -690,8 +694,8 @@ " \"PRECIP\": \"pr\",\n", "}\n", "\n", - "# The data keywords necessary to indicate the elevation, latitude and longitude of the ERA5 forcing data. Here\n", - "# we use the information for the basin average as the ERA5 data is averaged on the watershed.\n", + "# The data keywords necessary to indicate the elevation, latitude and longitude of the ERA5 forcing data.\n", + "# Here we use the information for the basin average as the ERA5 data is averaged on the watershed.\n", "data_kwds = {\n", " \"ALL\": {\n", " \"elevation\": hru[\"elevation\"],\n", @@ -700,10 +704,10 @@ " }\n", "}\n", "\n", - "# Give a name to the simulation\n", + "# Give a name to the simulation.\n", "run_name = \"Paper_example_simulation\"\n", "\n", - "# Setup the gauge object that includes meteorological data from ERA5\n", + "# Set up the gauge object that includes meteorological data from ERA5.\n", "gauge = [\n", " rc.Gauge.from_nc(\n", " tmp\n", @@ -717,8 +721,8 @@ "# Read the streamflow from the HYSETS catchment data for this basin\n", "discharge_data = [rc.ObservationData.from_nc(streamflow_file, alt_names=\"discharge\")]\n", "\n", - "# Which evaluation metric do we want to use for calibration. Raven will return this by default after each run,\n", - "# and the optimizer will read it directly to calibrate.\n", + "# Which evaluation metric do we want to use for calibration.\n", + "# Raven will return this by default after each run, and the optimizer will read it directly to calibrate.\n", "eval_metrics = (\"NASH_SUTCLIFFE\",)\n", "\n", "# Build the model configuration according to user preferences and inputs\n", @@ -751,12 +755,14 @@ }, "outputs": [], "source": [ - "# In order to calibrate your model, you need to give the lower and higher bounds of the model. In this case,\n", - "# we are passing the boundaries for a GR4JCN, but it's important to change them, if you are using another model.\n", + "# In order to calibrate your model, you need to give the lower and higher bounds of the model.\n", + "# In this case, we are passing the boundaries for a GR4JCN, but it's important to change them, if you are using another model.\n", "low = (0.01, -15.0, 10.0, 0.0, 1.0, 0.0)\n", "high = (2.5, 10.0, 700.0, 7.0, 30.0, 1.0)\n", "\n", - "# Random seed. We will provide one for consistency purposes, but operationnaly this should not be provided.\n", + "# Random seed.\n", + "# We will provide one for consistency purposes, but operationally this should not be provided.\n", + "# FIXME: This will change in numpy v2.0, so we will need to update this code then.\n", "random_seed = 42\n", "np.random.seed(random_seed)\n", "\n", @@ -767,39 +773,39 @@ " high=high,\n", ")\n", "\n", - "# Maximum number of model evaluations. We only use 200 here to keep the computation time as low as possible,\n", - "# but you will want to increase this for operational use, perhaps to 2000-5000 depending on the model.\n", + "# Maximum number of model evaluations.\n", + "# We only use 200 here to keep the computation time as low as possible, but you will want to increase this for operational use, perhaps to 2000-5000 depending on the model.\n", "max_iterations = 200\n", "\n", - "# Setup the spotpy sampler with the method, the setup configuration, a run name and other options. Please refer\n", - "# to the spotpy documentation for more options. We recommend sticking to this format for efficiency of most\n", - "# applications. Here we use DDS as the optimization algorithm. More are available: see the Spotpy documentation\n", - "# for more information. Here, DDS is used as it is powerful and particularly useful for optimizations with small\n", - "# evaluation budgets. For more details on DDS, see:\n", + "# Set up the spotpy sampler with the method, the setup configuration, a run name and other options.\n", + "# Please refer to the spotpy documentation for more options.\n", + "# We recommend sticking to this format for efficiency of most applications.\n", + "# Here we use DDS as the optimization algorithm. More are available: see the Spotpy documentation for more information.\n", + "# Here, DDS is used as it is powerful and particularly useful for optimizations with small evaluation budgets.\n", + "# For more details on DDS, see:\n", "#\n", - "# Tolson, B.A. and Shoemaker, C.A., 2007. Dynamically dimensioned search algorithm for computationally efficient watershed model calibration. Water\n", - "# Resources Research, 43(1)\n", + "# Tolson, B.A. and Shoemaker, C.A., 2007. Dynamically dimensioned search algorithm for computationally efficient watershed model calibration. Water Resources Research, 43(1)\n", "sampler = spotpy.algorithms.dds(\n", " spot_setup, dbname=\"RAVEN_model_run\", dbformat=\"ram\", save_sim=False\n", ")\n", "\n", - "# Launch the actual optimization. Multiple trials can be launched, where the entire process is repeated and\n", - "# the best overall value from all trials is returned.\n", + "# Launch the actual optimization.\n", + "# Multiple trials can be launched, where the entire process is repeated and the best overall value from all trials is returned.\n", "sampler.sample(max_iterations, trials=1)\n", "\n", - "# Get the model diagnostics\n", + "# Get the model diagnostics.\n", "diag = spot_setup.diagnostics\n", "\n", - "# Get all the values of each iteration\n", + "# Get all the values of each iteration.\n", "results = sampler.getdata()\n", "\n", - "# Get the raw resutlts directly in an array\n", + "# Get the raw results directly in an array.\n", "bestindex, bestobjfun = spotpy.analyser.get_maxlikeindex(\n", " results\n", - ") # Want to get the MAX NSE (change for min for RMSE)\n", + ") # Want to get the MAX NSE (change for min for RMSE).\n", "best_model_run = list(\n", " results[bestindex][0]\n", - ") # Get the parameter set returning the best NSE\n", + ") # Get the parameter set returning the best NSE.\n", "optimized_parameters = best_model_run[\n", " 1:-1\n", "] # Remove the NSE value (position 0) and the ID at the last position to get the actual parameter set.\n", @@ -834,10 +840,10 @@ "\n", "sim_output = Emulator(config=model_validation).run()\n", "\n", - "# Get validation NSE (note we are counting the first year without warm-up)\n", + "# Get validation NSE (note we are counting the first year without warm-up).\n", "NSE = sim_output.diagnostics[\"DIAG_NASH_SUTCLIFFE\"]\n", "\n", - "# Plot the model output\n", + "# Plot the model output.\n", "sim_output.hydrograph.q_sim.plot(color=\"blue\", label=\"Simulation\")\n", "sim_output.hydrograph.q_obs.plot(color=\"black\", label=\"Observation\")\n", "plt.legend()\n", @@ -866,11 +872,11 @@ }, "outputs": [], "source": [ - "# Setup a gauge for Raven to read-in the reference climate data, just like for ERA5\n", + "# Set up a gauge for Raven to read-in the reference climate data, just like for ERA5.\n", "gauge_ref = [\n", " rc.Gauge.from_nc(\n", " tmp\n", - " / \"reference_dataset.nc\", # Path to the CMIP6 model reference data netcdf file\n", + " / \"reference_dataset.nc\", # Path to the CMIP6 model reference data netcdf file.\n", " data_type=data_type,\n", " alt_names=alt_names,\n", " data_kwds=data_kwds,\n", @@ -881,15 +887,15 @@ "model_config_reference = model_validation.duplicate(\n", " Gauge=gauge_ref,\n", " StartDate=reference_start_day\n", - " + dt.timedelta(days=1), # Add a day here to account for the UTC lag in ERA5\n", + " + dt.timedelta(days=1), # Add a day here to account for the UTC lag in ERA5.\n", " EndDate=reference_end_day,\n", ")\n", "\n", "# Run the model from the configuration and get the outputs.\n", "ref_output = Emulator(config=model_config_reference).run()\n", "\n", - "# Plot the model output. Note that both simulations should have similar hydrological\n", - "# regime but day-to-day variability is not expected to match.\n", + "# Plot the model output.\n", + "# Note that both simulations should have similar hydrological regime but day-to-day variability is not expected to match.\n", "ref_output.hydrograph.q_sim.plot(color=\"blue\", label=\"Reference period simulation\")\n", "ref_output.hydrograph.q_obs.plot(color=\"black\", label=\"Observation\")\n", "plt.legend()\n", @@ -914,7 +920,7 @@ }, "outputs": [], "source": [ - "# Setup a gauge for Raven to read-in the future climate data, just like for the reference data\n", + "# Set up a gauge for Raven to read-in the future climate data, just like for the reference data.\n", "gauge_fut = [\n", " rc.Gauge.from_nc(\n", " tmp / \"future_dataset.nc\", # Path to the CMIP6 model reference data netcdf file\n", @@ -935,7 +941,7 @@ "# Run the model and get the outputs and hydrographs.\n", "fut_output = Emulator(config=model_config_future).run()\n", "\n", - "# Plot the model output\n", + "# Plot the model output.\n", "fut_output.hydrograph.q_sim.plot(color=\"blue\", label=\"Future simulation\")\n", "plt.legend()\n", "plt.title(\"Future period\")\n", @@ -972,7 +978,7 @@ "reference_flows = ref_output.hydrograph.q_sim.groupby(\"time.dayofyear\").mean()\n", "future_flows = fut_output.hydrograph.q_sim.groupby(\"time.dayofyear\").mean()\n", "\n", - "# Plot the model output\n", + "# Plot the model output.\n", "observed_flows.plot(color=\"black\", label=\"Observation\", x=\"dayofyear\")\n", "simulated_flows.plot(color=\"green\", label=\"Simulation\", x=\"dayofyear\")\n", "reference_flows.plot(color=\"blue\", label=\"Reference\", x=\"dayofyear\")\n",