Skip to content

Commit

Permalink
update notebooks to work with changes to dask chunking
Browse files Browse the repository at this point in the history
  • Loading branch information
Zeitsperre committed Oct 1, 2024
1 parent effc01e commit 9dc9fe7
Show file tree
Hide file tree
Showing 2 changed files with 155 additions and 144 deletions.
99 changes: 52 additions & 47 deletions docs/notebooks/08_Getting_and_bias_correcting_CMIP6_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@
"import xclim\n",
"import xclim.sdba as sdba\n",
"from clisops.core import average, subset\n",
"\n",
"from ravenpy.utilities.testdata import get_file\n",
"\n",
"tmp = Path(tempfile.mkdtemp())"
Expand Down Expand Up @@ -85,10 +84,10 @@
},
"outputs": [],
"source": [
"# We get the basin contour for testing on a server. You can replace the getfile method by a string containing the path\n",
"# to your own geojson\n",
"# We get the basin contour for testing on a server.\n",
"# You can replace the getfile method by a string containing the path to your own geojson.\n",
"\n",
"# Get basin contour\n",
"# Get basin contour.\n",
"basin_contour = get_file(\"notebook_inputs/input.geojson\")\n",
"\n",
"reference_start_day = dt.datetime(1980, 12, 31)\n",
Expand Down Expand Up @@ -158,15 +157,16 @@
},
"outputs": [],
"source": [
"# Prepare the filesystem that allows reading data. Data is read on the Google Cloud Services, which host a copy of the CMIP6 (and other) data.\n",
"# Prepare the filesystem that allows reading data.\n",
"# Data is read on the Google Cloud Services, which host a copy of the CMIP6 (and other) data.\n",
"fsCMIP = gcsfs.GCSFileSystem(token=\"anon\", access=\"read_only\")\n",
"\n",
"# Get the catalog info from the pangeo dataset, which basically is a list of links to the various products.\n",
"# Get the catalog info from the PANGEO dataset, which basically is a list of links to the various products.\n",
"col = intake.open_esm_datastore(\n",
" \"https://storage.googleapis.com/cmip6/pangeo-cmip6.json\"\n",
")\n",
"\n",
"# Print the contents of the catalog, so we can see the classification system\n",
"# Print the contents of the catalog, so we can see the classification system.\n",
"display(col)"
]
},
Expand All @@ -185,7 +185,8 @@
},
"outputs": [],
"source": [
"# Get the list of models. Replace \"source_id\" with any of the catalog categories (table_id, activity_id, variable_id, etc.)\n",
"# Get the list of models.\n",
"# Replace \"source_id\" with any of the catalog categories (table_id, activity_id, variable_id, etc.)\n",
"list(col.df.source_id.unique())"
]
},
Expand Down Expand Up @@ -225,11 +226,10 @@
" member_id=\"r1i1p1f1\",\n",
" source_id=climate_model,\n",
")\n",
"col_subset = col.search(\n",
" require_all_on=[\"source_id\"], **query\n",
") # Command that will return the filtered list\n",
"# Return the filtered list.\n",
"col_subset = col.search(require_all_on=[\"source_id\"], **query)\n",
"\n",
"# Show the filtered list:\n",
"# Show the filtered list.\n",
"display(col_subset.df)"
]
},
Expand All @@ -249,7 +249,7 @@
},
"outputs": [],
"source": [
"# Get the object locator object\n",
"# Get the object locator object.\n",
"mapper = fsCMIP.get_mapper(col_subset.df.zstore[0])"
]
},
Expand Down Expand Up @@ -277,17 +277,20 @@
},
"outputs": [],
"source": [
"# Get the CMIP6 data from Google Cloud and read it in memory using xarray. This is done via \"lazy loading\" and is not actually reading the data in memory\n",
"# yet, but is keeping track of what it will need to get, eventually.\n",
"# Get the CMIP6 data from Google Cloud and read it in memory using xarray.\n",
"# This is done via \"lazy loading\" and is not actually reading the data in memory yet, but is keeping track of what it will need to get, eventually.\n",
"ds = xr.open_zarr(mapper, consolidated=True)\n",
"\n",
"# Convert to numpy.datetime64 object to be compatbile\n",
"if type(ds.time[0].values) is not type(np.datetime64(\"1980-01-01\")):\n",
" ds = ds.convert_calendar(\"standard\")\n",
"# Convert to numpy.datetime64 object for compatibility.\n",
"ds = ds.convert_calendar(\"standard\")\n",
"\n",
"# Extract only the dates that we really want. Again, this is done via lazy loading, and is not actually using memory at this point.\n",
"# Extract only the dates that we really want.\n",
"# Again, this is done via lazy loading, and is not actually using memory at this point.\n",
"ds = ds.sel(time=slice(reference_start_day, reference_end_day))\n",
"\n",
"# Set the date to the midnight of the given day.\n",
"ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n",
"\n",
"# Use the clisops subsetting tools to extract the data for the watershed boundaries and take the spatial average\n",
"ds = average.average_shape(ds, basin_contour)\n",
"\n",
Expand Down Expand Up @@ -322,14 +325,16 @@
" with xr.set_options(keep_attrs=True):\n",
" ds = xr.open_zarr(mapper, consolidated=True)\n",
"\n",
" # Convert to numpy.datetime64 object to be compatbile\n",
" if type(ds.time[0].values) is not type(np.datetime64(\"1980-01-01\")):\n",
" ds = ds.convert_calendar(\"standard\")\n",
" # Convert to numpy.datetime64 object for compatibility.\n",
" ds = ds.convert_calendar(\"standard\")\n",
"\n",
" # Compute average over region\n",
" # Set the date to the midnight of the given day.\n",
" ds = ds.assign_coords(time=ds.time.dt.floor(\"D\"))\n",
"\n",
" # Compute the average over region.\n",
" out = average.average_shape(ds.sel(time=slice(start, end)), geometry)\n",
"\n",
" # Convert geometry variables into attributes\n",
" # Convert geometry variables into attributes.\n",
" attrs = {\n",
" key: out[key].values.item()\n",
" for key in out.coords\n",
Expand Down Expand Up @@ -431,9 +436,15 @@
" ERA5_reference = subset.subset_shape(\n",
" ds.sel(time=slice(reference_start_day, reference_end_day)), basin_contour\n",
" ).mean({\"latitude\", \"longitude\"})\n",
" ERA5_tmin = ERA5_reference[\"t2m\"].resample(time=\"1D\").min().chunk(-1, -1, -1)\n",
" ERA5_tmax = ERA5_reference[\"t2m\"].resample(time=\"1D\").max().chunk(-1, -1, -1)\n",
" ERA5_pr = ERA5_reference[\"tp\"].resample(time=\"1D\").sum().chunk(-1, -1, -1)"
" ERA5_tmin = (\n",
" ERA5_reference.t2m.resample(time=\"1D\")\n",
" .min()\n",
" .chunk(\n",
" time=-1,\n",
" )\n",
" )\n",
" ERA5_tmax = ERA5_reference.t2m.resample(time=\"1D\").max().chunk(time=-1)\n",
" ERA5_pr = ERA5_reference.tp.resample(time=\"1D\").sum().chunk(time=-1)"
]
},
{
Expand All @@ -455,8 +466,8 @@
},
"outputs": [],
"source": [
"# Here we need to make sure that our units are all in the correct format. You can play around with the tools we've seen thus far to explore the units\n",
"# and make sure everything is consistent.\n",
"# Here we need to make sure that our units are all in the correct format.\n",
"# You can play around with the tools we've seen thus far to explore the units and make sure everything is consistent.\n",
"\n",
"# Let's start with precipitation:\n",
"ERA5_pr = xclim.core.units.convert_units_to(ERA5_pr, \"mm\", context=\"hydro\")\n",
Expand Down Expand Up @@ -497,25 +508,25 @@
},
"outputs": [],
"source": [
"# Use xclim utilities (sbda) to give information on the type of window used for the bias correction.\n",
"# Use xclim utilities (SDBA) to give information on the type of window used for the bias correction.\n",
"group_month_window = sdba.utils.Grouper(\"time.dayofyear\", window=15)\n",
"\n",
"# This is an adjusting function. It builds the tool that will perform the corrections.\n",
"Adjustment = sdba.DetrendedQuantileMapping.train(\n",
" ref=ERA5_pr, hist=historical_pr, nquantiles=50, kind=\"+\", group=group_month_window\n",
")\n",
"\n",
"# Apply the correction factors on the reference period\n",
"# Apply the correction factors on the reference period.\n",
"corrected_ref_precip = Adjustment.adjust(historical_pr, interp=\"linear\")\n",
"\n",
"# Apply the correction factors on the future period\n",
"# Apply the correction factors on the future period.\n",
"corrected_fut_precip = Adjustment.adjust(future_pr, interp=\"linear\")\n",
"\n",
"# Ensure that the precipitation is non-negative, which can happen with some climate models\n",
"# Ensure that the precipitation is non-negative, which can happen with some climate models.\n",
"corrected_ref_precip = corrected_ref_precip.where(corrected_ref_precip > 0, 0)\n",
"corrected_fut_precip = corrected_fut_precip.where(corrected_fut_precip > 0, 0)\n",
"\n",
"# Train the model to find the correction factors for the maximum temperature (tasmax) data\n",
"# Train the model to find the correction factors for the maximum temperature (tasmax) data.\n",
"Adjustment = sdba.DetrendedQuantileMapping.train(\n",
" ref=ERA5_tmax,\n",
" hist=historical_tasmax,\n",
Expand All @@ -524,13 +535,13 @@
" group=group_month_window,\n",
")\n",
"\n",
"# Apply the correction factors on the reference period\n",
"# Apply the correction factors on the reference period.\n",
"corrected_ref_tasmax = Adjustment.adjust(historical_tasmax, interp=\"linear\")\n",
"\n",
"# Apply the correction factors on the future period\n",
"# Apply the correction factors on the future period.\n",
"corrected_fut_tasmax = Adjustment.adjust(future_tasmax, interp=\"linear\")\n",
"\n",
"# Train the model to find the correction factors for the minimum temperature (tasmin) data\n",
"# Train the model to find the correction factors for the minimum temperature (tasmin) data.\n",
"Adjustment = sdba.DetrendedQuantileMapping.train(\n",
" ref=ERA5_tmin,\n",
" hist=historical_tasmin,\n",
Expand Down Expand Up @@ -561,7 +572,8 @@
},
"outputs": [],
"source": [
"# Convert the reference corrected data into netCDF file. We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n",
"# Convert the reference corrected data into netCDF file.\n",
"# We will then apply a special code to remove a dimension in the dataset to make it applicable to the RAVEN models.\n",
"ref_dataset = xr.merge(\n",
" [\n",
" corrected_ref_precip.to_dataset(name=\"pr\"),\n",
Expand All @@ -570,11 +582,11 @@
" ]\n",
")\n",
"\n",
"# Write to temporary folder\n",
"# Write to temporary folder.\n",
"fn_ref = tmp / \"reference_dataset.nc\"\n",
"ref_dataset.to_netcdf(fn_ref)\n",
"\n",
"# Convert the future corrected data into netCDF file\n",
"# Convert the future corrected data into netCDF file.\n",
"fut_dataset = xr.merge(\n",
" [\n",
" corrected_fut_precip.to_dataset(name=\"pr\"),\n",
Expand Down Expand Up @@ -610,13 +622,6 @@
"# Compare it to the future precipitation without bias-correction.\n",
"future_pr.plot()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
Loading

0 comments on commit 9dc9fe7

Please sign in to comment.