diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index b3b151e8..49e60ecf 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -40,7 +40,6 @@ jobs: python-version: "${{ matrix.python-version }}" venv-id: "tests-${{ runner.os }}" poetry-dependency-install-flags: "--all-extras --only 'main,tests,coverage'" - # TODO: change this probably to capture coverage from non-regression tests, ok for now - name: Run regression tests relevant for coverage run: | poetry run python scripts/write-config.py diff --git a/LICENCES_SOURCES.md b/LICENCES_SOURCES.md new file mode 100644 index 00000000..92bf8051 --- /dev/null +++ b/LICENCES_SOURCES.md @@ -0,0 +1,46 @@ +Notes on the licences of the sources we use. +This is not checked with CI or anything, so may not be up to date. + +AGAGE: + +- data policy: https://agage.mit.edu/data/use-agage-data + - offer co-authorship + - contact Paul.Krummel@csiro.au to check + + +NOAA HATS: + +- https://gml.noaa.gov/hats/hats_datause.html + - reciprocity, otherwise can be used + + +NOAA CCG: + +- https://gml.noaa.gov/ccgg/data/datause.html + - reciprocity, otherwise can be used + + +EPICA: + +- https://doi.pangaea.de/10.1594/PANGAEA.552232 + - CC BY 3.0, can build upon + - https://creativecommons.org/licenses/by/3.0/ + +Law Dome: + +- https://data.csiro.au/collection/csiro%3A37077v2 + - CC BY 4.0 + - https://creativecommons.org/licenses/by/4.0/ + +NEEM: + +- https://doi.pangaea.de/10.1594/PANGAEA.899039 + - CC BY 4.0 + - https://creativecommons.org/licenses/by/4.0/ + +HadCRUT5: + +- https://www.metoffice.gov.uk/hadobs/hadcrut5/ + - Open Government 3 licence + - https://www.nationalarchives.gov.uk/doc/open-government-licence/version/3/ + - can use it for this, no worries diff --git a/dev-config.yaml b/dev-config.yaml index f775b017..24cccc49 100644 --- a/dev-config.yaml +++ b/dev-config.yaml @@ -48,7 +48,99 @@ calculate_n2o_monthly_fifteen_degree_pieces: processed_bin_averages_file: data/interim/n2o/n2o_observational-network_bin-averages.csv seasonality_allyears_fifteen_degree_monthly_file: data/interim/n2o/n2o_seasonality_fifteen-degree_allyears-monthly.nc step_config_id: only +calculate_sf6_like_monthly_fifteen_degree_pieces: +- allow_poleward_extension: true + gas: sf6 + global_annual_mean_allyears_file: data/interim/sf6/sf6_global-annual-mean_allyears.nc + global_annual_mean_allyears_monthly_file: data/interim/sf6/sf6_global-annual-mean_allyears-monthly.nc + lat_gradient_n_eofs_to_use: 1 + latitudinal_gradient_allyears_pcs_eofs_file: data/interim/sf6/sf6_allyears-lat-gradient-eofs-pcs.nc + latitudinal_gradient_fifteen_degree_allyears_monthly_file: data/interim/sf6/sf6_latitudinal-gradient_fifteen-degree_allyears-monthly.nc + latitudinal_gradient_pc0_total_emissions_regression_file: data/interim/sf6/sf6_pc0-total-emissions-regression.yaml + observational_network_global_annual_mean_file: data/interim/sf6/sf6_observational-network_global-annual-mean.nc + observational_network_interpolated_file: data/interim/sf6/sf6_observational-network_interpolated.nc + observational_network_latitudinal_gradient_eofs_file: data/interim/sf6/sf6_observational-network_latitudinal-gradient-eofs.nc + observational_network_seasonality_file: data/interim/sf6/sf6_observational-network_seasonality.nc + pre_industrial: + source: Guessing from reading M2017 + value: + - 0.0 + - ppt + year: 1950 + processed_all_data_with_bins_file: data/interim/sf6/sf6_observational-network_all-data-with-bin-information.csv + processed_bin_averages_file: data/interim/sf6/sf6_observational-network_bin-averages.csv + seasonality_allyears_fifteen_degree_monthly_file: data/interim/sf6/sf6_seasonality_fifteen-degree_allyears-monthly.nc + step_config_id: sf6 +- allow_poleward_extension: true + gas: cfc11 + global_annual_mean_allyears_file: data/interim/cfc11/cfc11_global-annual-mean_allyears.nc + global_annual_mean_allyears_monthly_file: data/interim/cfc11/cfc11_global-annual-mean_allyears-monthly.nc + lat_gradient_n_eofs_to_use: 1 + latitudinal_gradient_allyears_pcs_eofs_file: data/interim/cfc11/cfc11_allyears-lat-gradient-eofs-pcs.nc + latitudinal_gradient_fifteen_degree_allyears_monthly_file: data/interim/cfc11/cfc11_latitudinal-gradient_fifteen-degree_allyears-monthly.nc + latitudinal_gradient_pc0_total_emissions_regression_file: data/interim/cfc11/cfc11_pc0-total-emissions-regression.yaml + observational_network_global_annual_mean_file: data/interim/cfc11/cfc11_observational-network_global-annual-mean.nc + observational_network_interpolated_file: data/interim/cfc11/cfc11_observational-network_interpolated.nc + observational_network_latitudinal_gradient_eofs_file: data/interim/cfc11/cfc11_observational-network_latitudinal-gradient-eofs.nc + observational_network_seasonality_file: data/interim/cfc11/cfc11_observational-network_seasonality.nc + pre_industrial: + source: Guessing from reading M2017 + value: + - 0.0 + - ppt + year: 1950 + processed_all_data_with_bins_file: data/interim/cfc11/cfc11_observational-network_all-data-with-bin-information.csv + processed_bin_averages_file: data/interim/cfc11/cfc11_observational-network_bin-averages.csv + seasonality_allyears_fifteen_degree_monthly_file: data/interim/cfc11/cfc11_seasonality_fifteen-degree_allyears-monthly.nc + step_config_id: cfc11 +- allow_poleward_extension: true + gas: cfc12 + global_annual_mean_allyears_file: data/interim/cfc12/cfc12_global-annual-mean_allyears.nc + global_annual_mean_allyears_monthly_file: data/interim/cfc12/cfc12_global-annual-mean_allyears-monthly.nc + lat_gradient_n_eofs_to_use: 1 + latitudinal_gradient_allyears_pcs_eofs_file: data/interim/cfc12/cfc12_allyears-lat-gradient-eofs-pcs.nc + latitudinal_gradient_fifteen_degree_allyears_monthly_file: data/interim/cfc12/cfc12_latitudinal-gradient_fifteen-degree_allyears-monthly.nc + latitudinal_gradient_pc0_total_emissions_regression_file: data/interim/cfc12/cfc12_pc0-total-emissions-regression.yaml + observational_network_global_annual_mean_file: data/interim/cfc12/cfc12_observational-network_global-annual-mean.nc + observational_network_interpolated_file: data/interim/cfc12/cfc12_observational-network_interpolated.nc + observational_network_latitudinal_gradient_eofs_file: data/interim/cfc12/cfc12_observational-network_latitudinal-gradient-eofs.nc + observational_network_seasonality_file: data/interim/cfc12/cfc12_observational-network_seasonality.nc + pre_industrial: + source: Guessing from reading M2017 + value: + - 0.0 + - ppt + year: 1940 + processed_all_data_with_bins_file: data/interim/cfc12/cfc12_observational-network_all-data-with-bin-information.csv + processed_bin_averages_file: data/interim/cfc12/cfc12_observational-network_bin-averages.csv + seasonality_allyears_fifteen_degree_monthly_file: data/interim/cfc12/cfc12_seasonality_fifteen-degree_allyears-monthly.nc + step_config_id: cfc12 +- allow_poleward_extension: true + gas: hfc134a + global_annual_mean_allyears_file: data/interim/hfc134a/hfc134a_global-annual-mean_allyears.nc + global_annual_mean_allyears_monthly_file: data/interim/hfc134a/hfc134a_global-annual-mean_allyears-monthly.nc + lat_gradient_n_eofs_to_use: 1 + latitudinal_gradient_allyears_pcs_eofs_file: data/interim/hfc134a/hfc134a_allyears-lat-gradient-eofs-pcs.nc + latitudinal_gradient_fifteen_degree_allyears_monthly_file: data/interim/hfc134a/hfc134a_latitudinal-gradient_fifteen-degree_allyears-monthly.nc + latitudinal_gradient_pc0_total_emissions_regression_file: data/interim/hfc134a/hfc134a_pc0-total-emissions-regression.yaml + observational_network_global_annual_mean_file: data/interim/hfc134a/hfc134a_observational-network_global-annual-mean.nc + observational_network_interpolated_file: data/interim/hfc134a/hfc134a_observational-network_interpolated.nc + observational_network_latitudinal_gradient_eofs_file: data/interim/hfc134a/hfc134a_observational-network_latitudinal-gradient-eofs.nc + observational_network_seasonality_file: data/interim/hfc134a/hfc134a_observational-network_seasonality.nc + pre_industrial: + source: Guessing from reading M2017 + value: + - 0.0 + - ppt + year: 1990 + processed_all_data_with_bins_file: data/interim/hfc134a/hfc134a_observational-network_all-data-with-bin-information.csv + processed_bin_averages_file: data/interim/hfc134a/hfc134a_observational-network_bin-averages.csv + seasonality_allyears_fifteen_degree_monthly_file: data/interim/hfc134a/hfc134a_seasonality_fifteen-degree_allyears-monthly.nc + step_config_id: hfc134a ci: false +compile_historical_emissions: +- complete_historical_emissions_file: data/processed/historical_emissions + step_config_id: only crunch_grids: - fifteen_degree_monthly_file: data/interim/co2/co2_fifteen-degree_monthly.nc gas: co2 @@ -68,6 +160,30 @@ crunch_grids: gmnhsh_mean_monthly_file: data/interim/n2o/n2o_gmnhsh-mean_monthly.nc half_degree_monthly_file: data/interim/n2o/n2o_half-degree_monthly.nc step_config_id: n2o +- fifteen_degree_monthly_file: data/interim/sf6/sf6_fifteen-degree_monthly.nc + gas: sf6 + gmnhsh_mean_annual_file: data/interim/sf6/sf6_gmnhsh-mean_annual.nc + gmnhsh_mean_monthly_file: data/interim/sf6/sf6_gmnhsh-mean_monthly.nc + half_degree_monthly_file: data/interim/sf6/sf6_half-degree_monthly.nc + step_config_id: sf6 +- fifteen_degree_monthly_file: data/interim/cfc11/cfc11_fifteen-degree_monthly.nc + gas: cfc11 + gmnhsh_mean_annual_file: data/interim/cfc11/cfc11_gmnhsh-mean_annual.nc + gmnhsh_mean_monthly_file: data/interim/cfc11/cfc11_gmnhsh-mean_monthly.nc + half_degree_monthly_file: data/interim/cfc11/cfc11_half-degree_monthly.nc + step_config_id: cfc11 +- fifteen_degree_monthly_file: data/interim/cfc12/cfc12_fifteen-degree_monthly.nc + gas: cfc12 + gmnhsh_mean_annual_file: data/interim/cfc12/cfc12_gmnhsh-mean_annual.nc + gmnhsh_mean_monthly_file: data/interim/cfc12/cfc12_gmnhsh-mean_monthly.nc + half_degree_monthly_file: data/interim/cfc12/cfc12_half-degree_monthly.nc + step_config_id: cfc12 +- fifteen_degree_monthly_file: data/interim/hfc134a/hfc134a_fifteen-degree_monthly.nc + gas: hfc134a + gmnhsh_mean_annual_file: data/interim/hfc134a/hfc134a_gmnhsh-mean_annual.nc + gmnhsh_mean_monthly_file: data/interim/hfc134a/hfc134a_gmnhsh-mean_monthly.nc + half_degree_monthly_file: data/interim/hfc134a/hfc134a_half-degree_monthly.nc + step_config_id: hfc134a name: CI plot_input_data_overviews: - step_config_id: only @@ -75,6 +191,18 @@ process_noaa_hats_data: - gas: n2o processed_monthly_data_with_loc_file: data/interim/noaa/monthly_n2o_hats.csv step_config_id: n2o +- gas: sf6 + processed_monthly_data_with_loc_file: data/interim/noaa/monthly_sf6_hats.csv + step_config_id: sf6 +- gas: cfc11 + processed_monthly_data_with_loc_file: data/interim/noaa/monthly_cfc11_hats.csv + step_config_id: cfc11 +- gas: cfc12 + processed_monthly_data_with_loc_file: data/interim/noaa/monthly_cfc12_hats.csv + step_config_id: cfc12 +- gas: hfc134a + processed_monthly_data_with_loc_file: data/interim/noaa/monthly_hfc134a_hats.csv + step_config_id: hfc134a process_noaa_in_situ_data: - gas: co2 processed_monthly_data_with_loc_file: data/interim/noaa/monthly_co2_in-situ.csv @@ -89,9 +217,6 @@ process_noaa_surface_flask_data: - gas: ch4 processed_monthly_data_with_loc_file: data/interim/noaa/monthly_ch4_surface-flask.csv step_config_id: ch4 -- gas: n2o - processed_monthly_data_with_loc_file: data/interim/noaa/monthly_n2o_surface-flask.csv - step_config_id: n2o retrieve_and_extract_agage_data: - download_complete_file: data/raw/agage/agage/ch4_gc-md_monthly.complete download_urls: @@ -131,6 +256,192 @@ retrieve_and_extract_agage_data: raw_dir: data/raw/agage/agage step_config_id: n2o_gc-md_monthly time_frequency: monthly +- download_complete_file: data/raw/agage/agage/sf6_gc-md_monthly.complete + download_urls: + - known_hash: 9ec255bbd55447d8ac521a4683951e0b0aa682d8252a1072e5fa555b90af5aa1 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/capegrim-sf6-ecd/ascii/AGAGE-GC-ECD-SF6_CGO_sf6_mon.txt + gas: sf6 + generate_hashes: false + instrument: gc-md + processed_monthly_data_with_loc_file: data/interim/agage/agage/sf6_gc-md_monthly.csv + raw_dir: data/raw/agage/agage + step_config_id: sf6_gc-md_monthly + time_frequency: monthly +- download_complete_file: data/raw/agage/agage/sf6_gc-ms-medusa_monthly.complete + download_urls: + - known_hash: 1611fb6ed6087b506af19ca8a08cdf750e2c185b136b2460ba013ace39b47714 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/barbados/ascii/AGAGE-GCMS-Medusa_RPB_sf6_mon.txt + - known_hash: d387ab096cc53fae4efa5026eaaba4f3df0ceec7a1afcfc1128687372e6505d3 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/capegrim/ascii/AGAGE-GCMS-Medusa_CGO_sf6_mon.txt + - known_hash: b556daf22abd2edf0874b875a60bd02f246315c4c5a9748273362cb210c7e077 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/gosan/ascii/AGAGE-GCMS-Medusa_GSN_sf6_mon.txt + - known_hash: 030a37af25e2d806d8ac65be66f264f449923a1b8b077c92c647577d4efe3720 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/jungfraujoch/ascii/AGAGE-GCMS-Medusa_JFJ_sf6_mon.txt + - known_hash: 42d1f57226972df7d16786310e95288db0b604033d8fac82b8b03630d36d908a + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/macehead/ascii/AGAGE-GCMS-Medusa_MHD_sf6_mon.txt + - known_hash: cb05383d875d6020a0017942551f909954354ed2701a12754da6bdb80e45612f + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/mtecimone/ascii/AGAGE-GCMS-Medusa_CMN_sf6_mon.txt + - known_hash: 95fa2ccc93fe2dfefcb64d1405e81e0bc556a1892e5b4818312003de92eaf23b + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/samoa/ascii/AGAGE-GCMS-Medusa_SMO_sf6_mon.txt + - known_hash: 12270c0ab91decaaffc0f47710c27de9147a125938c3e98b1c10a98a2922f441 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/tacolneston/ascii/AGAGE-GCMS-Medusa_TAC_sf6_mon.txt + - known_hash: da146fc89d0e2b2e87846c6e5fc5712820a5f7bff69f054d90ac0ce80a1cf2a7 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/trinidad/ascii/AGAGE-GCMS-Medusa_THD_sf6_mon.txt + - known_hash: 2de77c7f417510878c18d4ea452815ac4555ae17719e6786548b063ee471e5bf + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/zeppelin/ascii/AGAGE-GCMS-Medusa_ZEP_sf6_mon.txt + gas: sf6 + generate_hashes: false + instrument: gc-ms-medusa + processed_monthly_data_with_loc_file: data/interim/agage/agage/sf6_gc-ms-medusa_monthly.csv + raw_dir: data/raw/agage/agage + step_config_id: sf6_gc-ms-medusa_monthly + time_frequency: monthly +- download_complete_file: data/raw/agage/agage/cfc11_gc-md_monthly.complete + download_urls: + - known_hash: 7f52297786487e9ede8a785c89b1d793250c4198223bafaa265cdcc268bbc978 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/barbados/ascii/AGAGE-GCMD_RPB_cfc-11_mon.txt + - known_hash: ebc838159a6ccc0bb98ac16203e927abea3371aa8aea801db572c816761074cd + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/capegrim/ascii/AGAGE-GCMD_CGO_cfc-11_mon.txt + - known_hash: aa6ef6875861e0d127fadc4697467b08ba8272d0fd3be91dd63713490de86645 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/macehead/ascii/AGAGE-GCMD_MHD_cfc-11_mon.txt + - known_hash: d1722aa15bf3a77415b97f0f9a1c1e58912e0ace054b00c8767e75666f877318 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/samoa/ascii/AGAGE-GCMD_SMO_cfc-11_mon.txt + - known_hash: 9929b38d196ef1397615b51988631f1e790797d86c260f57b387074cb667ef56 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/trinidad/ascii/AGAGE-GCMD_THD_cfc-11_mon.txt + gas: cfc11 + generate_hashes: false + instrument: gc-md + processed_monthly_data_with_loc_file: data/interim/agage/agage/cfc11_gc-md_monthly.csv + raw_dir: data/raw/agage/agage + step_config_id: cfc11_gc-md_monthly + time_frequency: monthly +- download_complete_file: data/raw/agage/agage/cfc11_gc-ms-medusa_monthly.complete + download_urls: + - known_hash: 6d8b653daf80d3fd8c295c91e8842e8c81242c36a543da88b19964c1de7ef7ad + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/gosan/ascii/AGAGE-GCMS-Medusa_GSN_cfc-11_mon.txt + - known_hash: 1f2dd4650b49c7ee5d9e5a763e1be3daeb72f0243ea249ab3b9c9d586e71f8c4 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/jungfraujoch/ascii/AGAGE-GCMS-Medusa_JFJ_cfc-11_mon.txt + - known_hash: 35231a8b2a6776e815c843ad7ba0f99378733dbbaa6c112f33d30d0558e66ad8 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/mtecimone/ascii/AGAGE-GCMS-Medusa_CMN_cfc-11_mon.txt + - known_hash: 33fbb30c985d2ae36a48f5a7e6e92e66ea84c83004db006ca2fb1de72f922112 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/tacolneston/ascii/AGAGE-GCMS-Medusa_TAC_cfc-11_mon.txt + - known_hash: f1eb53ecfa4294ef3581b536804fccac36519dbc4ddafa856c4c4aeb9e7aa048 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/zeppelin/ascii/AGAGE-GCMS-Medusa_ZEP_cfc-11_mon.txt + gas: cfc11 + generate_hashes: false + instrument: gc-ms-medusa + processed_monthly_data_with_loc_file: data/interim/agage/agage/cfc11_gc-ms-medusa_monthly.csv + raw_dir: data/raw/agage/agage + step_config_id: cfc11_gc-ms-medusa_monthly + time_frequency: monthly +- download_complete_file: data/raw/agage/agage/cfc11_gc-ms_monthly.complete + download_urls: + - known_hash: 3394d57cc17222ccc5de8f98edbe26131afc63e718ff9d4563727a098704aa93 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/mtecimone/ascii/AGAGE-GCMS-MteCimone_CMN_cfc-11_mon.txt + gas: cfc11 + generate_hashes: false + instrument: gc-ms + processed_monthly_data_with_loc_file: data/interim/agage/agage/cfc11_gc-ms_monthly.csv + raw_dir: data/raw/agage/agage + step_config_id: cfc11_gc-ms_monthly + time_frequency: monthly +- download_complete_file: data/raw/agage/agage/cfc12_gc-md_monthly.complete + download_urls: + - known_hash: 504f4d3db1bea293392d3efaf151bfad7e5f2277224cc765877e00e69fd02be0 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/barbados/ascii/AGAGE-GCMD_RPB_cfc-12_mon.txt + - known_hash: 01b70bb4ba8abd98c02f07259c27ce8a105d4208e7bf423f406e4b8da6b480f3 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/capegrim/ascii/AGAGE-GCMD_CGO_cfc-12_mon.txt + - known_hash: 400e82052314190309650b596a1e2e8bb6ed06e1bff67d831df8b5e1d5738d1b + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/macehead/ascii/AGAGE-GCMD_MHD_cfc-12_mon.txt + - known_hash: 28823f27e07f83db07b2507f038cf046bf51697bca8cc19e7b50414e1aea2775 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/samoa/ascii/AGAGE-GCMD_SMO_cfc-12_mon.txt + - known_hash: 737e826ab87381fd98578363c5bd040d92c33356ede9af3aae802824a47887c9 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/trinidad/ascii/AGAGE-GCMD_THD_cfc-12_mon.txt + gas: cfc12 + generate_hashes: false + instrument: gc-md + processed_monthly_data_with_loc_file: data/interim/agage/agage/cfc12_gc-md_monthly.csv + raw_dir: data/raw/agage/agage + step_config_id: cfc12_gc-md_monthly + time_frequency: monthly +- download_complete_file: data/raw/agage/agage/cfc12_gc-ms-medusa_monthly.complete + download_urls: + - known_hash: 50344532103b093e6442f8f329a4bdb41c578a72f1be77836bb3170b20abab57 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/gosan/ascii/AGAGE-GCMS-Medusa_GSN_cfc-12_mon.txt + - known_hash: fa7ac9be9f3d650d4cbd5840b7905968f28a6a6a88a8f5e43ea06fa0f1f29ac2 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/jungfraujoch/ascii/AGAGE-GCMS-Medusa_JFJ_cfc-12_mon.txt + - known_hash: f46822e1c50c6b51db99066ffa671548709d6455fcb6b3989f75643383b49bab + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/mtecimone/ascii/AGAGE-GCMS-Medusa_CMN_cfc-12_mon.txt + - known_hash: 5db4ff4ce96cc261c386a62d9d1b59a8458d8db2edc2db7c5b62f6e686ba2989 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/tacolneston/ascii/AGAGE-GCMS-Medusa_TAC_cfc-12_mon.txt + - known_hash: d018493146fcee2f9750275dd561dd74ff827c207c28435727ba1bb9164e07d2 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/zeppelin/ascii/AGAGE-GCMS-Medusa_ZEP_cfc-12_mon.txt + gas: cfc12 + generate_hashes: false + instrument: gc-ms-medusa + processed_monthly_data_with_loc_file: data/interim/agage/agage/cfc12_gc-ms-medusa_monthly.csv + raw_dir: data/raw/agage/agage + step_config_id: cfc12_gc-ms-medusa_monthly + time_frequency: monthly +- download_complete_file: data/raw/agage/agage/cfc12_gc-ms_monthly.complete + download_urls: + - known_hash: 0325346db119008b4e8a7dd7b9641b8eb097b6b03e69e6af29f240155ace3a2e + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/mtecimone/ascii/AGAGE-GCMS-MteCimone_CMN_cfc-12_mon.txt + gas: cfc12 + generate_hashes: false + instrument: gc-ms + processed_monthly_data_with_loc_file: data/interim/agage/agage/cfc12_gc-ms_monthly.csv + raw_dir: data/raw/agage/agage + step_config_id: cfc12_gc-ms_monthly + time_frequency: monthly +- download_complete_file: data/raw/agage/agage/hfc134a_gc-ms-medusa_monthly.complete + download_urls: + - known_hash: d2f39aa42e4ae6182084d595bb51d7a928913819ec30dedc5fab2b09ebce50fa + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/barbados/ascii/AGAGE-GCMS-Medusa_RPB_hfc-134a_mon.txt + - known_hash: 289fd88e1f6e8aa0fce15dadc1e9c1d246108d9a9d615327c4e242dbbbf8095c + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/capegrim/ascii/AGAGE-GCMS-Medusa_CGO_hfc-134a_mon.txt + - known_hash: bfdb55db913a285192ac5cdb9456d452cf75d190f5df2d7ad58b10b91bb5211b + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/gosan/ascii/AGAGE-GCMS-Medusa_GSN_hfc-134a_mon.txt + - known_hash: 7078f3636bbd582f09f9706c4a36bd07a3ea2194800d4dc485a01ac6cefbd3be + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/jungfraujoch/ascii/AGAGE-GCMS-Medusa_JFJ_hfc-134a_mon.txt + - known_hash: b30dc3b0829d52cda6554a63558f20e2e23713562e6d08cc28beeecb770b9dbe + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/macehead/ascii/AGAGE-GCMS-Medusa_MHD_hfc-134a_mon.txt + - known_hash: 67bcdb81b91af3a417cf60a7b3fad1c3feb7e6b1ba53ba509c75da9c67754d03 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/mtecimone/ascii/AGAGE-GCMS-Medusa_CMN_hfc-134a_mon.txt + - known_hash: 3aefaadbb40df63585397d3fe8ef4f5ce9980264bd68d2f281d7f3e40177267a + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/samoa/ascii/AGAGE-GCMS-Medusa_SMO_hfc-134a_mon.txt + - known_hash: 31c4b1d2c15cfe77869a72ee60f6e749fbb775dc5a62451637e72097c3cd0d21 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/tacolneston/ascii/AGAGE-GCMS-Medusa_TAC_hfc-134a_mon.txt + - known_hash: b61be1912adf36961a37127f78daa42f903b044e796493c7a46ae180c036aa72 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/trinidad/ascii/AGAGE-GCMS-Medusa_THD_hfc-134a_mon.txt + - known_hash: 724b699558e9c26cccaff47ef18ce73812cc1a0b25fdb3a2e93d01893f0c564d + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/zeppelin/ascii/AGAGE-GCMS-Medusa_ZEP_hfc-134a_mon.txt + gas: hfc134a + generate_hashes: false + instrument: gc-ms-medusa + processed_monthly_data_with_loc_file: data/interim/agage/agage/hfc134a_gc-ms-medusa_monthly.csv + raw_dir: data/raw/agage/agage + step_config_id: hfc134a_gc-ms-medusa_monthly + time_frequency: monthly +- download_complete_file: data/raw/agage/agage/hfc134a_gc-ms_monthly.complete + download_urls: + - known_hash: 8c6405431ea194670ac50816bf7a01556bf240d95110098c6f8203a27c73eefd + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/capegrim/ascii/AGAGE-GCMS-ADS_CGO_hfc-134a_mon.txt + - known_hash: 7b0a36cc62629440de2d4ed11c4a52744d38bdac0dc096ee35144bced8ebb032 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/jungfraujoch/ascii/AGAGE-GCMS-ADS_JFJ_hfc-134a_mon.txt + - known_hash: 8ffe2a817fe60d8427a88fdad07937d1103d70cf7780cfac409fe20676bdc4b4 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/macehead/ascii/AGAGE-GCMS-ADS_MHD_hfc-134a_mon.txt + - known_hash: e69c58e0a4f96d9782645d837e8189582e8471ff4ad22b3f9230bd038ccb4600 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/mtecimone/ascii/AGAGE-GCMS-MteCimone_CMN_hfc-134a_mon.txt + - known_hash: 2cce94a135d6a8f48dcfd487e9196265c8f290d1f29a0b98ef5020607d615516 + url: https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/zeppelin/ascii/AGAGE-GCMS-ADS_ZEP_hfc-134a_mon.txt + gas: hfc134a + generate_hashes: false + instrument: gc-ms + processed_monthly_data_with_loc_file: data/interim/agage/agage/hfc134a_gc-ms_monthly.csv + raw_dir: data/raw/agage/agage + step_config_id: hfc134a_gc-ms_monthly + time_frequency: monthly retrieve_and_extract_ale_data: - download_complete_file: data/raw/agage/ale/ale_monthly.complete download_urls: @@ -166,7 +477,7 @@ retrieve_and_extract_gage_data: retrieve_and_extract_noaa_data: - download_complete_file: data/raw/noaa/co2_in-situ.complete download_urls: - - known_hash: 0a68c9716bb9ec29e23966a2394e312618ed9b822885876d1ce5517bdf70acbe + - known_hash: f06a7eb8f8e56f775e4843b889ba4388ad61557c95b0d8a764522893f2d90bc1 url: https://gml.noaa.gov/aftp/data/greenhouse_gases/co2/in-situ/surface/co2_surface-insitu_ccgg_text.zip gas: co2 interim_files: @@ -187,7 +498,7 @@ retrieve_and_extract_noaa_data: step_config_id: co2_surface-flask - download_complete_file: data/raw/noaa/ch4_in-situ.complete download_urls: - - known_hash: c8ad74288d860c63b6a027df4d7bf6742e772fc4e3f99a4052607a382d7fefb2 + - known_hash: 0ac02608ec33f6057e496463e2cf755970d40f4e653a6a5a7d6d14ec519b863e url: https://gml.noaa.gov/aftp/data/greenhouse_gases/ch4/in-situ/surface/ch4_surface-insitu_ccgg_text.zip gas: ch4 interim_files: @@ -216,17 +527,46 @@ retrieve_and_extract_noaa_data: raw_dir: data/raw/noaa source: hats step_config_id: n2o_hats -- download_complete_file: data/raw/noaa/n2o_surface-flask.complete +- download_complete_file: data/raw/noaa/sf6_hats.complete download_urls: - - known_hash: 6b7e09c37b7fa456ab170a4c7b825b3d4b9f6eafb0ff61a2a46554b0e63e84b1 - url: https://gml.noaa.gov/aftp/data/trace_gases/n2o/flask/surface/n2o_surface-flask_ccgg_text.zip - gas: n2o + - known_hash: 822543e2558e9e22e943478d37dffe0c758091c35d1ff9bf2b2697507dd3b39d + url: https://gml.noaa.gov/aftp/data/hats/sf6/combined/GML_global_SF6.txt + gas: sf6 interim_files: - events_data: data/interim/noaa/events_n2o_surface-flask_raw-consolidated.csv - monthly_data: data/interim/noaa/monthly_n2o_surface-flask_raw-consolidated.csv + monthly_data: data/interim/noaa/monthly_sf6_hats_raw-consolidated.csv raw_dir: data/raw/noaa - source: surface-flask - step_config_id: n2o_surface-flask + source: hats + step_config_id: sf6_hats +- download_complete_file: data/raw/noaa/cfc11_hats.complete + download_urls: + - known_hash: c6067e98bf3896a45e21a248155bbf07815facce2c428bf015560602f31661f9 + url: https://gml.noaa.gov/aftp/data/hats/cfcs/cfc11/combined/HATS_global_F11.txt + gas: cfc11 + interim_files: + monthly_data: data/interim/noaa/monthly_cfc11_hats_raw-consolidated.csv + raw_dir: data/raw/noaa + source: hats + step_config_id: cfc11_hats +- download_complete_file: data/raw/noaa/cfc12_hats.complete + download_urls: + - known_hash: 2537e02a6c4fc880c15db6ddf7ff0037add7e3f55fb227523e24ca16363128e0 + url: https://gml.noaa.gov/aftp/data/hats/cfcs/cfc12/combined/HATS_global_F12.txt + gas: cfc12 + interim_files: + monthly_data: data/interim/noaa/monthly_cfc12_hats_raw-consolidated.csv + raw_dir: data/raw/noaa + source: hats + step_config_id: cfc12_hats +- download_complete_file: data/raw/noaa/hfc134a_hats.complete + download_urls: + - known_hash: b4d7c2b760d13e2fe9f720b063dfec2b00f6ece65094d4a2e970bd53280a55a5 + url: https://gml.noaa.gov/aftp/data/hats/hfcs/hfc134a_GCMS_flask.txt + gas: hfc134a + interim_files: + monthly_data: data/interim/noaa/monthly_hfc134a_hats_raw-consolidated.csv + raw_dir: data/raw/noaa + source: hats + step_config_id: hfc134a_hats retrieve_and_process_epica_data: - download_url: known_hash: 26c9259d69bfe390f521d1f651de8ea37ece5bbb95b43df749ba4e00f763e9fd @@ -252,7 +592,7 @@ retrieve_and_process_scripps_data: [] retrieve_misc_data: - hadcrut5: download_url: - known_hash: 1063dbe93d7b486464c4c3b06466cd2a4a836638249c1552619a40cda2d78298 + known_hash: c1e6b0b6b372a428adea4fac109eca0278acf857ace4da0f43221fd0379ea353 url: https://www.metoffice.gov.uk/hadobs/hadcrut5/data/HadCRUT.5.0.2.0/analysis/diagnostics/HadCRUT.5.0.2.0.analysis.summary_series.global.annual.nc raw_dir: data/raw/hadcrut5 natural_earth: @@ -351,3 +691,27 @@ write_input4mips: input4mips_out_dir: data/processed/esgf-ready start_year: 1 step_config_id: n2o +- complete_file: data/processed/esgf-ready/sf6_input4MIPs_esgf-ready.complete + end_year: 2022 + gas: sf6 + input4mips_out_dir: data/processed/esgf-ready + start_year: 1 + step_config_id: sf6 +- complete_file: data/processed/esgf-ready/cfc11_input4MIPs_esgf-ready.complete + end_year: 2022 + gas: cfc11 + input4mips_out_dir: data/processed/esgf-ready + start_year: 1 + step_config_id: cfc11 +- complete_file: data/processed/esgf-ready/cfc12_input4MIPs_esgf-ready.complete + end_year: 2022 + gas: cfc12 + input4mips_out_dir: data/processed/esgf-ready + start_year: 1 + step_config_id: cfc12 +- complete_file: data/processed/esgf-ready/hfc134a_input4MIPs_esgf-ready.complete + end_year: 2022 + gas: hfc134a + input4mips_out_dir: data/processed/esgf-ready + start_year: 1 + step_config_id: hfc134a diff --git a/dodo.py b/dodo.py index 44cb5489..eaf25fa3 100644 --- a/dodo.py +++ b/dodo.py @@ -22,6 +22,7 @@ from __future__ import annotations import datetime as dt +import logging import os import time from collections.abc import Iterable @@ -57,7 +58,11 @@ See https://pydoit.org/configuration.html#configuration-at-dodo-py """ -logger = setup_logging() +logger = setup_logging( + stdout_level=logging.WARNING, + log_file=os.environ.get("DOIT_LOG_FILE", f"doit_{RUN_ID}.log"), + file_level=logging.INFO, +) def print_key_info() -> None: diff --git a/notebooks-archive/01yy_process-data/0111_process-gggrn-global-mean.py b/notebooks-archive/01yy_process-data/0111_process-gggrn-global-mean.py index 77bd6d42..0ce1ecbd 100644 --- a/notebooks-archive/01yy_process-data/0111_process-gggrn-global-mean.py +++ b/notebooks-archive/01yy_process-data/0111_process-gggrn-global-mean.py @@ -82,7 +82,7 @@ raw = pd.read_csv( config_retrieve.gggrn.raw_dir / filename, skiprows=skiprows, - delim_whitespace=True, + sep=r"\s+", ) unit = gas_units[gas] diff --git a/notebooks/000y_retrieve-misc-data/0001_natural-earth-shape-files.py b/notebooks/000y_retrieve-misc-data/0001_natural-earth-shape-files.py index 5f5181b6..92b87c8a 100644 --- a/notebooks/000y_retrieve-misc-data/0001_natural-earth-shape-files.py +++ b/notebooks/000y_retrieve-misc-data/0001_natural-earth-shape-files.py @@ -5,7 +5,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.15.2 +# jupytext_version: 1.16.1 # kernelspec: # display_name: Python 3 (ipykernel) # language: python diff --git a/notebooks/001y_process-noaa-data/0010_download.py b/notebooks/001y_process-noaa-data/0010_download.py index cbbb78c0..97611355 100644 --- a/notebooks/001y_process-noaa-data/0010_download.py +++ b/notebooks/001y_process-noaa-data/0010_download.py @@ -18,12 +18,6 @@ # Download data from the [NOAA Global Monitoring Laboratory (GML) Carbon Cycle Greenhouse Gases (CCGG) research area](https://gml.noaa.gov/ccgg/flask.html), specifically the [data page](https://gml.noaa.gov/ccgg/data/). # # For simplicity, here we just refer to this as the NOAA network. This is sort of line with what is done in [Forster et al., 2023](https://essd.copernicus.org/articles/15/2295/2023/essd-15-2295-2023.pdf), who call it the "NOAA Global Monitoring Laboratory (GML)" (which appears to be the name of the top-level program). Puzzlingly, this network seems to also be referred to as the [Global Greenhouse Gas Reference Network (GGGRN)](https://gml.noaa.gov/ccgg/data/) (TODO: ask someone who knows what the difference between the acronyms is meant to mean). -# -# To-do: -# -# - read old global-mean processing (also called 0010 but in a different folder) and extract any insights from there -# - add in handling of in situ measurements too (in situ and flask measurements treated as different stations in M17) -# - parameterise notebook so we can do same for CH4, N2O and SF6 observations # %% [markdown] # ## Imports @@ -51,7 +45,7 @@ # %% editable=true slideshow={"slide_type": ""} tags=["parameters"] config_file: str = "../../dev-config-absolute.yaml" # config file -step_config_id: str = "n2o_hats" # config ID to select for this branch +step_config_id: str = "co2_in-situ" # config ID to select for this branch # %% [markdown] # ## Load config diff --git a/notebooks/001y_process-noaa-data/0011_extract.py b/notebooks/001y_process-noaa-data/0011_extract.py index aecb0051..3e6786b7 100644 --- a/notebooks/001y_process-noaa-data/0011_extract.py +++ b/notebooks/001y_process-noaa-data/0011_extract.py @@ -29,6 +29,7 @@ from local.noaa_processing import ( read_noaa_flask_zip, read_noaa_hats, + read_noaa_hats_combined, read_noaa_in_situ_zip, ) @@ -46,7 +47,7 @@ # %% editable=true slideshow={"slide_type": ""} tags=["parameters"] config_file: str = "../../dev-config-absolute.yaml" # config file -step_config_id: str = "n2o_hats" # config ID to select for this branch +step_config_id: str = "co2_in-situ" # config ID to select for this branch # %% [markdown] # ## Load config @@ -87,7 +88,13 @@ print(df_months) elif config_step.source == "hats": - df_months = read_noaa_hats(zf, gas=config_step.gas, source=config_step.source) + if config_step.gas in ("n2o", "sf6", "cfc11", "cfc12"): + df_months = read_noaa_hats_combined( + zf, gas=config_step.gas, source=config_step.source + ) + + else: + df_months = read_noaa_hats(zf, gas=config_step.gas, source=config_step.source) print("df_months") print(df_months) diff --git a/notebooks/001y_process-noaa-data/0012_process_surface-flask.py b/notebooks/001y_process-noaa-data/0012_process_surface-flask.py index c6bc53c5..40a2c4b1 100644 --- a/notebooks/001y_process-noaa-data/0012_process_surface-flask.py +++ b/notebooks/001y_process-noaa-data/0012_process_surface-flask.py @@ -47,7 +47,7 @@ # %% editable=true slideshow={"slide_type": ""} tags=["parameters"] config_file: str = "../../dev-config-absolute.yaml" # config file -step_config_id: str = "n2o" # config ID to select for this branch +step_config_id: str = "sf6" # config ID to select for this branch # %% [markdown] # ## Load config diff --git a/notebooks/001y_process-noaa-data/0013_process_in-situ.py b/notebooks/001y_process-noaa-data/0013_process_in-situ.py index 6a946dd0..01d07241 100644 --- a/notebooks/001y_process-noaa-data/0013_process_in-situ.py +++ b/notebooks/001y_process-noaa-data/0013_process_in-situ.py @@ -85,13 +85,48 @@ # # Nice and easy as this data already has everything we need. -# %% editable=true slideshow={"slide_type": ""} +# %% monthly_dfs_with_loc = df_months[PROCESSED_DATA_COLUMNS] + +# %% +if config_step.step_config_id in ["co2", "ch4"]: + # There is one month where there is duplicate data for MKO, + # presumably from moving because of the fires. + # We deal with this here becuase it is such an edge case. + edge_case_year_month = (2023, 7) + edge_case_rows_select = ( + (monthly_dfs_with_loc["year"] == edge_case_year_month[0]) + & (monthly_dfs_with_loc["month"] == edge_case_year_month[1]) + & (monthly_dfs_with_loc["site_code_filename"] == "mko") + ) + edge_case_rows = monthly_dfs_with_loc[edge_case_rows_select] + exp_n_edge_case_rows = 2 + assert edge_case_rows.shape[0] == exp_n_edge_case_rows + + # Assume that a mean is fine, it seems justifiable in overall noise + # and not sure what else to do... + edge_case_row_new = ( + edge_case_rows.groupby(list(set(edge_case_rows.columns) - {"value"})) + .mean() + .reset_index() + ) + + monthly_dfs_with_loc = pd.concat( + [monthly_dfs_with_loc[~edge_case_rows_select], edge_case_row_new] + ) + monthly_dfs_with_loc[ + (monthly_dfs_with_loc["year"] == edge_case_year_month[0]) + & (monthly_dfs_with_loc["month"] == edge_case_year_month[1]) + & (monthly_dfs_with_loc["site_code_filename"] == "mko") + ] + +# %% editable=true slideshow={"slide_type": ""} +duplicate_entries = monthly_dfs_with_loc[ + ["gas", "year", "month", "site_code_filename"] +][monthly_dfs_with_loc[["gas", "year", "month", "site_code_filename"]].duplicated()] assert ( - not monthly_dfs_with_loc[["gas", "year", "month", "site_code_filename"]] - .duplicated() - .any() -), "Duplicate entries for a station in a month" + duplicate_entries.shape[0] == 0 +), f"Duplicate entries for a station in a month {duplicate_entries}" monthly_dfs_with_loc # %% editable=true slideshow={"slide_type": ""} diff --git a/notebooks/001y_process-noaa-data/0014_process_hats.py b/notebooks/001y_process-noaa-data/0014_process_hats.py index ee3107b7..89a87b0a 100644 --- a/notebooks/001y_process-noaa-data/0014_process_hats.py +++ b/notebooks/001y_process-noaa-data/0014_process_hats.py @@ -46,7 +46,7 @@ # %% editable=true slideshow={"slide_type": ""} tags=["parameters"] config_file: str = "../../dev-config-absolute.yaml" # config file -step_config_id: str = "n2o" # config ID to select for this branch +step_config_id: str = "hfc134a" # config ID to select for this branch # %% [markdown] # ## Load config @@ -104,7 +104,7 @@ # countries.columns.tolist() # %% -colours = ( +colours = tuple( c for c in [ "tab:blue", @@ -121,7 +121,7 @@ "tab:cyan", ] ) -markers = ( +markers = tuple( m for m in [ "o", @@ -142,14 +142,14 @@ ] ) -for station, station_df in tqdman.tqdm( - monthly_dfs_with_loc.groupby("site_code"), desc="Stations" +for i, (station, station_df) in tqdman.tqdm( + enumerate(monthly_dfs_with_loc.groupby("site_code")), desc="Stations" ): print(station_df) fig, axes = plt.subplots(ncols=2, figsize=(12, 4)) - colour = next(colours) - marker = next(markers) + colour = colours[i % len(colours)] + marker = markers[i % len(colours)] countries.plot(color="lightgray", ax=axes[0]) @@ -189,7 +189,7 @@ # %% fig, axes = plt.subplots(ncols=2, figsize=(12, 4)) -colours = ( +colours = tuple( c for c in [ "tab:blue", @@ -206,7 +206,7 @@ "tab:cyan", ] ) -markers = ( +markers = tuple( m for m in [ "o", @@ -229,11 +229,11 @@ countries.plot(color="lightgray", ax=axes[0]) -for station, station_df in tqdman.tqdm( - monthly_dfs_with_loc.groupby("site_code"), desc="Stations" +for i, (station, station_df) in tqdman.tqdm( + enumerate(monthly_dfs_with_loc.groupby("site_code")), desc="Stations" ): - colour = next(colours) - marker = next(markers) + colour = colours[i % len(colours)] + marker = markers[i % len(colours)] station_df[["longitude", "latitude"]].drop_duplicates().plot( x="longitude", diff --git a/notebooks/001y_process-noaa-data/0019_noaa-network-overview.py b/notebooks/001y_process-noaa-data/0019_noaa-network-overview.py index 4df3443d..e7aa11d2 100644 --- a/notebooks/001y_process-noaa-data/0019_noaa-network-overview.py +++ b/notebooks/001y_process-noaa-data/0019_noaa-network-overview.py @@ -58,22 +58,23 @@ if config.ci: to_show: tuple[tuple[str, str, str], ...] = ( - # ("co2", "in-situ", "process_noaa_in_situ_data"), - # ("co2", "surface-flask", "process_noaa_surface_flask_data"), + ("co2", "in-situ", "process_noaa_in_situ_data"), + ("co2", "surface-flask", "process_noaa_surface_flask_data"), ("ch4", "in-situ", "process_noaa_in_situ_data"), ("ch4", "surface-flask", "process_noaa_surface_flask_data"), - ("n2o", "surface-flask", "process_noaa_surface_flask_data"), ("n2o", "hats", "process_noaa_hats_data"), + ("sf6", "hats", "process_noaa_hats_data"), + ("cfc11", "hats", "process_noaa_hats_data"), ) else: to_show = ( - # ("co2", "in-situ", "process_noaa_in_situ_data"), - # ("co2", "surface-flask", "process_noaa_surface_flask_data"), + ("co2", "in-situ", "process_noaa_in_situ_data"), + ("co2", "surface-flask", "process_noaa_surface_flask_data"), ("ch4", "in-situ", "process_noaa_in_situ_data"), ("ch4", "surface-flask", "process_noaa_surface_flask_data"), - ("n2o", "surface-flask", "process_noaa_surface_flask_data"), ("n2o", "hats", "process_noaa_hats_data"), - # ("sf6", "surface-flask", "process_noaa_surface_flask_data"), + ("sf6", "hats", "process_noaa_hats_data"), + ("cfc11", "hats", "process_noaa_hats_data"), ) gas_configs = { diff --git a/notebooks/002y_process-agage-data/0020_download-agage.py b/notebooks/002y_process-agage-data/0020_download-agage.py index d0c1c5b7..8ff87c28 100644 --- a/notebooks/002y_process-agage-data/0020_download-agage.py +++ b/notebooks/002y_process-agage-data/0020_download-agage.py @@ -52,7 +52,7 @@ # %% editable=true slideshow={"slide_type": ""} tags=["parameters"] config_file: str = "../../dev-config-absolute.yaml" # config file -step_config_id: str = "ccl4_gc-md_monthly" # config ID to select for this branch +step_config_id: str = "sf6_gc-ms-medusa_monthly" # config ID to select for this branch # %% [markdown] # ## Load config diff --git a/notebooks/002y_process-agage-data/0023_extract-agage.py b/notebooks/002y_process-agage-data/0023_extract-agage.py index b9e4450a..e9806bfd 100644 --- a/notebooks/002y_process-agage-data/0023_extract-agage.py +++ b/notebooks/002y_process-agage-data/0023_extract-agage.py @@ -50,7 +50,7 @@ # %% editable=true slideshow={"slide_type": ""} tags=["parameters"] config_file: str = "../../dev-config-absolute.yaml" # config file -step_config_id: str = "n2o_gc-md_monthly" # config ID to select for this branch +step_config_id: str = "cfc12_gc-md_monthly" # config ID to select for this branch # %% [markdown] # ## Load config @@ -77,17 +77,28 @@ else: raise NotImplementedError(config_step.time_frequency) +# %% +AGAGE_GAS_MAPPING = {"cfc11": "cfc-11", "cfc12": "cfc-12", "hfc134a": "hfc-134a"} +AGAGE_GAS_MAPPING_REVERSED = {v: k for k, v in AGAGE_GAS_MAPPING.items()} + # %% def is_relevant_file(f: Path) -> bool: """ Check if a data file is relevant for this notebook """ - if not (f.name.endswith(suffix) and f"_{config_step.gas}_" in f.name): + try: + gas_to_find = AGAGE_GAS_MAPPING[config_step.gas] + + except KeyError: + gas_to_find = config_step.gas + + if not (f.name.endswith(suffix) and f"_{gas_to_find}_" in f.name): return False if config_step.instrument == "gc-ms-medusa" and "GCMS-Medusa" not in f.name: return False + elif config_step.instrument == "gc-ms": if "GCMS-Medusa" in f.name: return False @@ -106,6 +117,8 @@ def is_relevant_file(f: Path) -> bool: # %% relevant_files = [f for f in list(config_step.raw_dir.glob("*")) if is_relevant_file(f)] +if not relevant_files: + raise AssertionError() relevant_files @@ -220,6 +233,24 @@ def read_agage_file( df_monthly = pd.concat([v[1] for v in read_info], axis=0) df_monthly +# %% + +# %% +for gas_file_option in df_monthly["gas"].unique(): + if gas_file_option == config_step.gas: + continue + + mapped_name = AGAGE_GAS_MAPPING_REVERSED[gas_file_option] + if mapped_name == config_step.gas: + print(f"Assuming {gas_file_option} is the same as {mapped_name}") + continue + + raise NotImplementedError(gas_file_option) + +# Now we can re-name with confidence +df_monthly["gas"] = config_step.gas +df_monthly + # %% [markdown] # ### Plot diff --git a/notebooks/010y_compile-historical-emissions/0109_compile-complete-dataset.py b/notebooks/010y_compile-historical-emissions/0109_compile-complete-dataset.py new file mode 100644 index 00000000..1e64c791 --- /dev/null +++ b/notebooks/010y_compile-historical-emissions/0109_compile-complete-dataset.py @@ -0,0 +1,120 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] +# # Compile historical emissions - complete dataset +# +# Put all the data together. +# +# At the moment, this just uses the RCMIP data. +# In future, it will actually compile data from previous steps. + +# %% [markdown] +# ## Imports + +# %% + +import openscm_units +import pandas as pd +import pint +import pooch +import scmdata +from pydoit_nb.config_handling import get_config_for_step_id + +from local.config import load_config_from_file + +# %% +pint.set_application_registry(openscm_units.unit_registry) # type: ignore + +# %% [markdown] +# ## Define branch this notebook belongs to + +# %% +step: str = "compile_historical_emissions" + +# %% [markdown] +# ## Parameters + +# %% editable=true slideshow={"slide_type": ""} tags=["parameters"] +config_file: str = "../../dev-config-absolute.yaml" # config file +step_config_id: str = "only" # config ID to select for this branch + +# %% [markdown] +# ## Load config + +# %% +config = load_config_from_file(config_file) +config_step = get_config_for_step_id( + config=config, step=step, step_config_id=step_config_id +) + +# %% [markdown] +# ## Action + +# %% +rcmip_emissions_fname = pooch.retrieve( + url="https://rcmip-protocols-au.s3-ap-southeast-2.amazonaws.com/v5.1.0/rcmip-emissions-annual-means-v5-1-0.csv", + known_hash="md5:4044106f55ca65b094670e7577eaf9b3", + path=config_step.complete_historical_emissions_file.parent, + progressbar=True, +) +rcmip_emissions_fname + +# %% +rcmip_all = scmdata.ScmRun(rcmip_emissions_fname, lowercase_cols=True) +rcmip_cmip6_historical = ( + rcmip_all.filter(region="World", scenario="ssp245") + .filter(variable=["*Montreal Gases*", "*F-Gases*"]) + .resample("YS") +) +rcmip_cmip6_historical + + +# %% +def rename_variable(v: str) -> str: + """ + Re-name variable to our internal conventions + """ + toks = v.split("|") + return "|".join([toks[0], toks[-1].lower()]) + + +def fix_units(u: str) -> str: + """ + Fix units so that we can handle them + """ + return u + + +# %% +out: pd.DataFrame = rcmip_cmip6_historical.long_data(time_axis="year") # type: ignore +out["variable"] = out["variable"].apply(rename_variable) +out["unit"] = out["unit"].apply(fix_units) +out + +# %% +# Make sure all our units are understood by OpenSCM-units +for unit in out["unit"].unique(): + openscm_units.unit_registry.Quantity(1, unit) + +# %% +sorted(out["unit"].unique()) + +# %% +sorted(out["variable"].unique()) + +# %% +config_step.complete_historical_emissions_file.parent.mkdir(exist_ok=True, parents=True) +out.to_csv(config_step.complete_historical_emissions_file, index=False) +config_step.complete_historical_emissions_file diff --git a/notebooks/10yy_n2o-monthly-15-degree/1000_n2o_bin-observational-network.py b/notebooks/10yy_n2o-monthly-15-degree/1000_n2o_bin-observational-network.py index cda1acfd..8500c5c0 100644 --- a/notebooks/10yy_n2o-monthly-15-degree/1000_n2o_bin-observational-network.py +++ b/notebooks/10yy_n2o-monthly-15-degree/1000_n2o_bin-observational-network.py @@ -56,11 +56,12 @@ config=config, step=step, step_config_id=step_config_id ) -config_process_noaa_surface_flask_data = get_config_for_step_id( - config=config, - step="process_noaa_surface_flask_data", - step_config_id=config_step.gas, -) +# # Don't use surface flask network as it is already included in HATS +# config_process_noaa_surface_flask_data = get_config_for_step_id( +# config=config, +# step="process_noaa_surface_flask_data", +# step_config_id=config_step.gas, +# ) config_process_noaa_hats_data = get_config_for_step_id( config=config, step="process_noaa_hats_data", @@ -88,7 +89,6 @@ # %% all_data_l = [] for f in [ - config_process_noaa_surface_flask_data.processed_monthly_data_with_loc_file, config_process_noaa_hats_data.processed_monthly_data_with_loc_file, config_process_agage_data_gc_md.processed_monthly_data_with_loc_file, config_process_ale_data.processed_monthly_data_with_loc_file, diff --git a/notebooks/10yy_n2o-monthly-15-degree/1002_n2o_global-mean-latitudinal-gradient-seasonality.py b/notebooks/10yy_n2o-monthly-15-degree/1002_n2o_global-mean-latitudinal-gradient-seasonality.py index e87181ab..264978a9 100644 --- a/notebooks/10yy_n2o-monthly-15-degree/1002_n2o_global-mean-latitudinal-gradient-seasonality.py +++ b/notebooks/10yy_n2o-monthly-15-degree/1002_n2o_global-mean-latitudinal-gradient-seasonality.py @@ -191,11 +191,18 @@ # ### Seasonality # %% -seasonality, relative_seasonality = local.seasonality.calculate_seasonality( +( + seasonality, + relative_seasonality, + lon_mean_ym_monthly_anomalies, +) = local.seasonality.calculate_seasonality( lon_mean=lon_mean, global_mean=global_mean, ) +# %% +lon_mean_ym_monthly_anomalies.plot.line(hue="lat", col="year", col_wrap=3) + # %% seasonality.plot.line(hue="lat") diff --git a/notebooks/10yy_n2o-monthly-15-degree/1005_n2o_create-pieces-for-gridding.py b/notebooks/10yy_n2o-monthly-15-degree/1005_n2o_create-pieces-for-gridding.py index c217186e..e6cef3cd 100644 --- a/notebooks/10yy_n2o-monthly-15-degree/1005_n2o_create-pieces-for-gridding.py +++ b/notebooks/10yy_n2o-monthly-15-degree/1005_n2o_create-pieces-for-gridding.py @@ -116,25 +116,29 @@ # %% fig, axes = plt.subplots(ncols=3, figsize=(12, 4)) -local.xarray_time.convert_year_month_to_time(global_annual_mean_monthly).plot( # type: ignore +local.xarray_time.convert_year_month_to_time(global_annual_mean_monthly, calendar="proleptic_gregorian").plot( # type: ignore ax=axes[0] ) -local.xarray_time.convert_year_to_time(global_annual_mean).plot.scatter( - x="time", color="tab:orange", zorder=3, alpha=0.5, ax=axes[0] -) +local.xarray_time.convert_year_to_time( + global_annual_mean, calendar="proleptic_gregorian" +).plot.scatter(x="time", color="tab:orange", zorder=3, alpha=0.5, ax=axes[0]) local.xarray_time.convert_year_month_to_time( # type: ignore - global_annual_mean_monthly.sel(year=global_annual_mean_monthly["year"][1:10]) + global_annual_mean_monthly.sel(year=global_annual_mean_monthly["year"][1:10]), + calendar="proleptic_gregorian", ).plot(ax=axes[1]) local.xarray_time.convert_year_to_time( - global_annual_mean.sel(year=global_annual_mean_monthly["year"][1:10]) + global_annual_mean.sel(year=global_annual_mean_monthly["year"][1:10]), + calendar="proleptic_gregorian", ).plot.scatter(x="time", color="tab:orange", zorder=3, alpha=0.5, ax=axes[1]) local.xarray_time.convert_year_month_to_time( # type: ignore - global_annual_mean_monthly.sel(year=global_annual_mean_monthly["year"][-10:]) + global_annual_mean_monthly.sel(year=global_annual_mean_monthly["year"][-10:]), + calendar="proleptic_gregorian", ).plot(ax=axes[2]) local.xarray_time.convert_year_to_time( - global_annual_mean.sel(year=global_annual_mean_monthly["year"][-10:]) + global_annual_mean.sel(year=global_annual_mean_monthly["year"][-10:]), + calendar="proleptic_gregorian", ).plot.scatter(x="time", color="tab:orange", zorder=3, alpha=0.5, ax=axes[2]) plt.tight_layout() @@ -150,7 +154,7 @@ np.testing.assert_allclose( seasonality_full.mean("month").data.m, 0.0, - atol=1e-10, + atol=1e-8, ) # %% @@ -167,13 +171,14 @@ # %% local.xarray_time.convert_year_month_to_time( - seasonality_full.sel(year=seasonality_full["year"][-6:]) + seasonality_full.sel(year=seasonality_full["year"][-6:]), + calendar="proleptic_gregorian", ).plot(x="time", hue="lat", alpha=0.7, col="lat", col_wrap=3, sharey=True) # %% -local.xarray_time.convert_year_month_to_time(seasonality_full).plot( - x="time", hue="lat", alpha=0.7, col="lat", col_wrap=3, sharey=True -) +local.xarray_time.convert_year_month_to_time( + seasonality_full, calendar="proleptic_gregorian" +).plot(x="time", hue="lat", alpha=0.7, col="lat", col_wrap=3, sharey=True) # %% [markdown] # ### Latitudinal gradient, monthly @@ -199,23 +204,25 @@ fig, axes = plt.subplots(ncols=3, figsize=(12, 4)) -local.xarray_time.convert_year_month_to_time(pcs_monthly).plot(ax=axes[0], hue="eof") -local.xarray_time.convert_year_to_time(pcs_annual).plot.scatter( - x="time", hue="eof", zorder=3, alpha=0.5, ax=axes[0] -) +local.xarray_time.convert_year_month_to_time( + pcs_monthly, calendar="proleptic_gregorian" +).plot(ax=axes[0], hue="eof") +local.xarray_time.convert_year_to_time( + pcs_annual, calendar="proleptic_gregorian" +).plot.scatter(x="time", hue="eof", zorder=3, alpha=0.5, ax=axes[0]) local.xarray_time.convert_year_month_to_time( - pcs_monthly.sel(year=pcs_monthly["year"][1:10]) + pcs_monthly.sel(year=pcs_monthly["year"][1:10]), calendar="proleptic_gregorian" ).plot(ax=axes[1], hue="eof") local.xarray_time.convert_year_to_time( - pcs_annual.sel(year=pcs_monthly["year"][1:10]) + pcs_annual.sel(year=pcs_monthly["year"][1:10]), calendar="proleptic_gregorian" ).plot.scatter(x="time", hue="eof", zorder=3, alpha=0.5, ax=axes[1]) local.xarray_time.convert_year_month_to_time( - pcs_monthly.sel(year=pcs_monthly["year"][-10:]) + pcs_monthly.sel(year=pcs_monthly["year"][-10:]), calendar="proleptic_gregorian" ).plot(ax=axes[2], hue="eof") local.xarray_time.convert_year_to_time( - pcs_annual.sel(year=pcs_monthly["year"][-10:]) + pcs_annual.sel(year=pcs_monthly["year"][-10:]), calendar="proleptic_gregorian" ).plot.scatter(x="time", hue="eof", zorder=3, alpha=0.5, ax=axes[2]) plt.tight_layout() @@ -236,9 +243,9 @@ latitudinal_gradient_monthly # %% -local.xarray_time.convert_year_month_to_time(latitudinal_gradient_monthly).plot( - hue="lat" -) +local.xarray_time.convert_year_month_to_time( + latitudinal_gradient_monthly, calendar="proleptic_gregorian" +).plot(hue="lat") # %% [markdown] # ### Save diff --git a/notebooks/11yy_ch4-monthly-15-degree/1102_ch4_global-mean-latitudinal-gradient-seasonality.py b/notebooks/11yy_ch4-monthly-15-degree/1102_ch4_global-mean-latitudinal-gradient-seasonality.py index da008d3e..d5c7cb1d 100644 --- a/notebooks/11yy_ch4-monthly-15-degree/1102_ch4_global-mean-latitudinal-gradient-seasonality.py +++ b/notebooks/11yy_ch4-monthly-15-degree/1102_ch4_global-mean-latitudinal-gradient-seasonality.py @@ -190,11 +190,18 @@ # ### Seasonality # %% -seasonality, relative_seasonality = local.seasonality.calculate_seasonality( +( + seasonality, + relative_seasonality, + lon_mean_ym_monthly_anomalies, +) = local.seasonality.calculate_seasonality( lon_mean=lon_mean, global_mean=global_mean, ) +# %% +lon_mean_ym_monthly_anomalies.plot.line(hue="lat", col="year", col_wrap=3) + # %% seasonality.plot.line(hue="lat") diff --git a/notebooks/12yy_co2-monthly-15-degree/1202_co2_observational-network-global-mean-latitudinal-gradient-seasonality.py b/notebooks/12yy_co2-monthly-15-degree/1202_co2_observational-network-global-mean-latitudinal-gradient-seasonality.py index 8cda5dea..10801876 100644 --- a/notebooks/12yy_co2-monthly-15-degree/1202_co2_observational-network-global-mean-latitudinal-gradient-seasonality.py +++ b/notebooks/12yy_co2-monthly-15-degree/1202_co2_observational-network-global-mean-latitudinal-gradient-seasonality.py @@ -198,11 +198,14 @@ # ### Seasonality # %% -seasonality, _ = local.seasonality.calculate_seasonality( +seasonality, _, lon_mean_ym_monthly_anomalies = local.seasonality.calculate_seasonality( lon_mean=lon_mean, global_mean=global_mean, ) +# %% +lon_mean_ym_monthly_anomalies.plot.line(hue="lat", col="year", col_wrap=3) + # %% seasonality.plot.line(hue="lat") @@ -219,81 +222,6 @@ ) seasonality_anomalies -# %% -# seasonality_anomalies_stacked = seasonality_anomalies.stack({"lat-month": ["lat", "month"]}) -# svd_ready = seasonality_anomalies_stacked.transpose("year", "lat-month").pint.dequantify() - -# U, D, Vh = np.linalg.svd( -# svd_ready, -# full_matrices=False, -# ) -# # If you take the full SVD, you get back the original matrix -# if not np.allclose( -# svd_ready, -# U @ np.diag(D) @ Vh, -# ): -# msg = "Something wrong with SVD" -# raise AssertionError(msg) - -# # Empirical orthogonal functions (each column is an EOF) -# eofs = Vh.T - -# # Principal components are the scaling factors on the EOFs -# principal_components = U @ np.diag(D) - -# # Similarly, if you use the full EOFs and principal components, -# # you get back the original matrix -# if not np.allclose( -# svd_ready, -# principal_components @ eofs.T, -# ): -# msg = "Something wrong with PC and EOF breakdown" -# raise AssertionError(msg) - -# xr_principal_components_keep = xr.DataArray( -# name="principal-components", -# data=principal_components, -# dims=["year", "eof"], -# coords=dict( -# year=svd_ready["year"], -# eof=range(principal_components.shape[1]), -# ), -# attrs=dict( -# description="Principal components for the seasonality change EOFs", -# units="dimensionless", -# ), -# ).pint.quantify() - -# xr_eofs_keep = xr.DataArray( -# name="eofs", -# data=eofs, -# dims=["lat-month", "eof"], -# coords={ -# "lat-month": svd_ready["lat-month"], -# "eof": range(principal_components.shape[1]), -# "lat": svd_ready["lat"], -# "month": svd_ready["lat"], -# }, -# attrs=dict( -# description="EOFs for the seasonality change", -# units=svd_ready.attrs["units"], -# ), -# ).pint.quantify() - -# res = xr.merge([xr_eofs_keep, xr_principal_components_keep], combine_attrs="drop_conflicts") - -# # One final check -# if not np.allclose( -# svd_ready, -# (res["principal-components"] @ res["eofs"]).pint.dequantify(), -# ): -# msg = "Something wrong with saving as xarray" -# raise AssertionError(msg) - -# # res = res.unstack("lat-month") -# seasonality_change_eof_pieces = res -# seasonality_change_eof_pieces - # %% n_eofs_to_show = 3 ( @@ -352,7 +280,7 @@ if year % 5: continue - for lat in [-37.5, 7.5, 52.5]: + for lat in [-37.5, 7.5, 52.5, 67.5]: fig, axes = plt.subplots(ncols=3, sharex=True, sharey=True) selected = seasonality_anomalies.sel(year=year, lat=lat) diff --git a/notebooks/12yy_co2-monthly-15-degree/1203_co2_extend-lat-gradient-pcs.py b/notebooks/12yy_co2-monthly-15-degree/1203_co2_extend-lat-gradient-pcs.py index 43c4f660..50d818b7 100644 --- a/notebooks/12yy_co2-monthly-15-degree/1203_co2_extend-lat-gradient-pcs.py +++ b/notebooks/12yy_co2-monthly-15-degree/1203_co2_extend-lat-gradient-pcs.py @@ -16,8 +16,7 @@ # # CO$_2$ - extend the latitudinal gradient principal components # # Extend the latitudinal gradient's principal components back in time. -# For CO$_2$, we do this by using the values from the Law Dome ice core -# and a regression against emissions. +# For CO$_2$, we do this by using a regression against emissions. # %% [markdown] # ## Imports diff --git a/notebooks/12yy_co2-monthly-15-degree/1206_co2_create-pieces-for-gridding.py b/notebooks/12yy_co2-monthly-15-degree/1206_co2_create-pieces-for-gridding.py index 83db14d2..f0df0773 100644 --- a/notebooks/12yy_co2-monthly-15-degree/1206_co2_create-pieces-for-gridding.py +++ b/notebooks/12yy_co2-monthly-15-degree/1206_co2_create-pieces-for-gridding.py @@ -122,29 +122,33 @@ # %% fig, axes = plt.subplots(ncols=3, figsize=(12, 4)) -local.xarray_time.convert_year_month_to_time(global_annual_mean_monthly).plot( # type: ignore +local.xarray_time.convert_year_month_to_time(global_annual_mean_monthly, calendar="proleptic_gregorian").plot( # type: ignore ax=axes[0] ) -local.xarray_time.convert_year_to_time(global_annual_mean).plot.scatter( - x="time", color="tab:orange", zorder=3, alpha=0.5, ax=axes[0] -) +local.xarray_time.convert_year_to_time( + global_annual_mean, calendar="proleptic_gregorian" +).plot.scatter(x="time", color="tab:orange", zorder=3, alpha=0.5, ax=axes[0]) local.xarray_time.convert_year_month_to_time( - global_annual_mean_monthly.sel(year=global_annual_mean_monthly["year"][1:10]) + global_annual_mean_monthly.sel(year=global_annual_mean_monthly["year"][1:10]), + calendar="proleptic_gregorian", ).plot( ax=axes[1] ) # type: ignore local.xarray_time.convert_year_to_time( - global_annual_mean.sel(year=global_annual_mean_monthly["year"][1:10]) + global_annual_mean.sel(year=global_annual_mean_monthly["year"][1:10]), + calendar="proleptic_gregorian", ).plot.scatter(x="time", color="tab:orange", zorder=3, alpha=0.5, ax=axes[1]) local.xarray_time.convert_year_month_to_time( - global_annual_mean_monthly.sel(year=global_annual_mean_monthly["year"][-10:]) + global_annual_mean_monthly.sel(year=global_annual_mean_monthly["year"][-10:]), + calendar="proleptic_gregorian", ).plot( ax=axes[2] ) # type: ignore local.xarray_time.convert_year_to_time( - global_annual_mean.sel(year=global_annual_mean_monthly["year"][-10:]) + global_annual_mean.sel(year=global_annual_mean_monthly["year"][-10:]), + calendar="proleptic_gregorian", ).plot.scatter(x="time", color="tab:orange", zorder=3, alpha=0.5, ax=axes[2]) plt.tight_layout() @@ -164,7 +168,7 @@ np.testing.assert_allclose( seasonality_full.mean("month").data.m, 0.0, - atol=1e-10, + atol=1e-7, ) # %% @@ -185,7 +189,7 @@ ).plot(x="time", hue="lat", alpha=0.7, col="lat", col_wrap=3, sharey=True) # %% -local.xarray_time.convert_year_month_to_time(seasonality_full).plot( # type: ignore +local.xarray_time.convert_year_month_to_time(seasonality_full, calendar="proleptic_gregorian").plot( # type: ignore x="time", hue="lat", alpha=0.7, col="lat", col_wrap=3, sharey=True ) @@ -213,23 +217,25 @@ fig, axes = plt.subplots(ncols=3, figsize=(12, 4)) -local.xarray_time.convert_year_month_to_time(pcs_monthly).plot(ax=axes[0], hue="eof") -local.xarray_time.convert_year_to_time(pcs_annual).plot.scatter( - x="time", hue="eof", zorder=3, alpha=0.5, ax=axes[0] -) +local.xarray_time.convert_year_month_to_time( + pcs_monthly, calendar="proleptic_gregorian" +).plot(ax=axes[0], hue="eof") +local.xarray_time.convert_year_to_time( + pcs_annual, calendar="proleptic_gregorian" +).plot.scatter(x="time", hue="eof", zorder=3, alpha=0.5, ax=axes[0]) local.xarray_time.convert_year_month_to_time( - pcs_monthly.sel(year=pcs_monthly["year"][1:10]) + pcs_monthly.sel(year=pcs_monthly["year"][1:10]), calendar="proleptic_gregorian" ).plot(ax=axes[1], hue="eof") local.xarray_time.convert_year_to_time( - pcs_annual.sel(year=pcs_monthly["year"][1:10]) + pcs_annual.sel(year=pcs_monthly["year"][1:10]), calendar="proleptic_gregorian" ).plot.scatter(x="time", hue="eof", zorder=3, alpha=0.5, ax=axes[1]) local.xarray_time.convert_year_month_to_time( - pcs_monthly.sel(year=pcs_monthly["year"][-10:]) + pcs_monthly.sel(year=pcs_monthly["year"][-10:]), calendar="proleptic_gregorian" ).plot(ax=axes[2], hue="eof") local.xarray_time.convert_year_to_time( - pcs_annual.sel(year=pcs_monthly["year"][-10:]) + pcs_annual.sel(year=pcs_monthly["year"][-10:]), calendar="proleptic_gregorian" ).plot.scatter(x="time", hue="eof", zorder=3, alpha=0.5, ax=axes[2]) plt.tight_layout() @@ -250,9 +256,9 @@ latitudinal_gradient_monthly # %% -local.xarray_time.convert_year_month_to_time(latitudinal_gradient_monthly).plot( - hue="lat" -) +local.xarray_time.convert_year_month_to_time( + latitudinal_gradient_monthly, calendar="proleptic_gregorian" +).plot(hue="lat") # %% [markdown] # ### Save diff --git a/notebooks/13yy_sf6-like-monthly-15-degree/1300_sf6-like_bin-observational-network.py b/notebooks/13yy_sf6-like-monthly-15-degree/1300_sf6-like_bin-observational-network.py new file mode 100644 index 00000000..9fef6eaa --- /dev/null +++ b/notebooks/13yy_sf6-like-monthly-15-degree/1300_sf6-like_bin-observational-network.py @@ -0,0 +1,131 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# # SF$_6$-like - binning +# +# Bin the observational network. + +# %% [markdown] +# ## Imports + +# %% +import openscm_units +import pandas as pd +import pint +from pydoit_nb.config_handling import get_config_for_step_id + +import local.binned_data_interpolation +import local.binning +import local.observational_network_binning +import local.raw_data_processing +from local.config import load_config_from_file + +# %% +pint.set_application_registry(openscm_units.unit_registry) # type: ignore + +# %% [markdown] +# ## Define branch this notebook belongs to + +# %% editable=true slideshow={"slide_type": ""} +step: str = "calculate_sf6_like_monthly_fifteen_degree_pieces" + +# %% [markdown] +# ## Parameters + +# %% editable=true slideshow={"slide_type": ""} tags=["parameters"] +config_file: str = "../../dev-config-absolute.yaml" # config file +step_config_id: str = "hfc134a" # config ID to select for this branch + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# ## Load config + +# %% editable=true slideshow={"slide_type": ""} +config = load_config_from_file(config_file) +config_step = get_config_for_step_id( + config=config, step=step, step_config_id=step_config_id +) + + +# %% [markdown] +# ## Action + +# %% [markdown] +# ### Load data + +# %% +obs_network_input_files = ( + local.observational_network_binning.get_obs_network_binning_input_files( + gas=config_step.gas, config=config + ) +) +obs_network_input_files + +# %% +all_data_l = [] +for f in obs_network_input_files: + try: + all_data_l.append(local.raw_data_processing.read_and_check_binning_columns(f)) + except Exception as exc: + msg = f"Error reading {f}" + raise ValueError(msg) from exc + +all_data = pd.concat(all_data_l) +# TODO: add check of gas names to processed data checker +all_data["gas"] = all_data["gas"].str.lower() +all_data = all_data[all_data["gas"] == config_step.gas] +all_data + +# %% [markdown] +# ## Bin and average data +# +# - all measurements from a station are first averaged for the month +# - then average over all stations +# - stations get equal weight +# - flask/in situ networks (i.e. different measurement methods/techniques) +# are treated as separate stations i.e. get equal weight +# - this order is best as you have a better chance of avoiding giving different times more weight by accident +# - properly equally weighting all times in the month would be very hard, +# because you'd need to interpolate to a super fine grid first (one for future research) + + +# %% +all_data_with_bins = local.binning.add_lat_lon_bin_columns(all_data) +all_data_with_bins + +# %% +print(local.binning.get_network_summary(all_data_with_bins)) + +# %% +bin_averages = local.binning.calculate_bin_averages(all_data_with_bins) +bin_averages + +# %% [markdown] +# ### Save + +# %% +local.binned_data_interpolation.check_data_columns_for_binned_data_interpolation( + bin_averages +) +assert set(bin_averages["gas"]) == {config_step.gas} + +# %% +config_step.processed_bin_averages_file.parent.mkdir(exist_ok=True, parents=True) +bin_averages.to_csv(config_step.processed_bin_averages_file, index=False) +bin_averages + +# %% +config_step.processed_all_data_with_bins_file.parent.mkdir(exist_ok=True, parents=True) +all_data_with_bins.to_csv(config_step.processed_all_data_with_bins_file, index=False) +all_data_with_bins diff --git a/notebooks/13yy_sf6-like-monthly-15-degree/1301_sf6-like_interpolate-observational-network.py b/notebooks/13yy_sf6-like-monthly-15-degree/1301_sf6-like_interpolate-observational-network.py new file mode 100644 index 00000000..84b27c76 --- /dev/null +++ b/notebooks/13yy_sf6-like-monthly-15-degree/1301_sf6-like_interpolate-observational-network.py @@ -0,0 +1,196 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# # SF$_6$-like - interpolate observational network +# +# Interpolate the observational network data onto our grid. + +# %% [markdown] +# ## Imports + +# %% +import cftime +import matplotlib.pyplot as plt +import numpy as np +import openscm_units +import pandas as pd +import pint +import tqdm.autonotebook as tqdman +from pydoit_nb.config_handling import get_config_for_step_id + +import local.binned_data_interpolation +import local.binning +import local.raw_data_processing +from local.config import load_config_from_file + +# %% +pint.set_application_registry(openscm_units.unit_registry) # type: ignore + +# %% [markdown] +# ## Define branch this notebook belongs to + +# %% editable=true slideshow={"slide_type": ""} +step: str = "calculate_sf6_like_monthly_fifteen_degree_pieces" + +# %% [markdown] +# ## Parameters + +# %% editable=true slideshow={"slide_type": ""} tags=["parameters"] +config_file: str = "../../dev-config-absolute.yaml" # config file +step_config_id: str = "hfc134a" # config ID to select for this branch + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# ## Load config + +# %% editable=true slideshow={"slide_type": ""} +config = load_config_from_file(config_file) +config_step = get_config_for_step_id( + config=config, step=step, step_config_id=step_config_id +) + + +# %% [markdown] +# ## Action + +# %% [markdown] +# ### Load data + +# %% +bin_averages = pd.read_csv(config_step.processed_bin_averages_file) +bin_averages + +# %% +all_data_with_bins = pd.read_csv(config_step.processed_all_data_with_bins_file) +all_data_with_bins + +# %% [markdown] +# ## Interpolate + +# %% +MIN_POINTS_FOR_SPATIAL_INTERPOLATION = 4 + +# %% +times_l = [] +interpolated_dat_l = [] +year_month_plot = ( + (1983, 1), + (1984, 1), + (1984, 3), + (2000, 4), + (2018, 1), + (2018, 2), + (2022, 12), + (2023, 1), + (2023, 12), +) + +for (year, month), ymdf in tqdman.tqdm(bin_averages.groupby(["year", "month"])): + if ymdf.shape[0] < MIN_POINTS_FOR_SPATIAL_INTERPOLATION: + msg = f"Not enough data ({ymdf.shape[0]} data points) for {year=}, {month=}, not performing spatial interpolation" + print(msg) + continue + + interpolated_ym = local.binned_data_interpolation.interpolate(ymdf) + show_plot = False + include_data = True + if np.isnan(interpolated_ym).any(): + if np.isnan(interpolated_ym.T[1:-1, :]).any(): + msg = f"Nan data after interpolation for {year=}, {month=}, not including spatial interpolation in output" + print(msg) + include_data = False + + elif not config_step.allow_poleward_extension: + msg = ( + f"Nan data after interpolation for {year=}, {month=} and no poleward extension allowed, " + "not including spatial interpolation in output" + ) + print(msg) + include_data = False + + else: + all_data_with_bins_ym = all_data_with_bins[ + (all_data_with_bins["year"] == year) + & (all_data_with_bins["month"] == month) + ] + + north_pole_has_nan = np.isnan(interpolated_ym.T[-1, :]).any() + south_pole_has_nan = np.isnan(interpolated_ym.T[0, :]).any() + + if north_pole_has_nan: + north_input = all_data_with_bins_ym[ + all_data_with_bins_ym["latitude"] + == all_data_with_bins_ym["latitude"].max() + ] + north_pole_value = north_input["value"].mean() + interpolated_ym[:, -1] = north_pole_value + + msg = f"Fixed North Pole with poleward extension of {north_pole_value}" + print(msg) + + if south_pole_has_nan: + south_input = all_data_with_bins_ym[ + all_data_with_bins_ym["latitude"] + == all_data_with_bins_ym["latitude"].min() + ] + south_pole_value = south_input["value"].mean() + interpolated_ym[:, 0] = south_pole_value + + msg = f"Fixed South Pole with poleward extension of {south_pole_value}" + print(msg) + + if np.isnan(interpolated_ym).any(): + msg = "Should be no more nan now" + raise AssertionError(msg) + + show_plot = True + + if include_data: + interpolated_dat_l.append(interpolated_ym) + times_l.append(cftime.datetime(year, month, 15)) + + if (year, month) in year_month_plot or show_plot: + # This will break if we ever change our internal gridding logic, ok for now. + lon_grid, lat_grid = np.meshgrid( + local.binning.LON_BIN_CENTRES, + local.binning.LAT_BIN_CENTRES, + ) + + plt.pcolormesh(lon_grid, lat_grid, interpolated_ym.T, shading="auto") + plt.plot(ymdf["lon_bin"], ymdf["lat_bin"], "ok", label="input point") + plt.legend() + plt.colorbar() + # plt.axis("equal") + plt.ylim([-90, 90]) + plt.title(f"{year} {month}") + plt.show() + +# %% +out = local.binned_data_interpolation.to_xarray_dataarray( + name=config_step.gas, + bin_averages_df=bin_averages, + data=interpolated_dat_l, + times=times_l, +) +out + +# %% [markdown] +# ### Save + +# %% +config_step.observational_network_interpolated_file.parent.mkdir( + exist_ok=True, parents=True +) +out.to_netcdf(config_step.observational_network_interpolated_file) +out diff --git a/notebooks/13yy_sf6-like-monthly-15-degree/1302_sf6-like_observational-network-global-mean-latitudinal-gradient-seasonality.py b/notebooks/13yy_sf6-like-monthly-15-degree/1302_sf6-like_observational-network-global-mean-latitudinal-gradient-seasonality.py new file mode 100644 index 00000000..6bc26c94 --- /dev/null +++ b/notebooks/13yy_sf6-like-monthly-15-degree/1302_sf6-like_observational-network-global-mean-latitudinal-gradient-seasonality.py @@ -0,0 +1,264 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# # SF$_6$-like - calculate observational network global- annual-mean, latitudinal gradient and seasonality +# +# Calculate global-mean, latitudinal gradient and seasonality +# based on the observational network. +# We also perform an EOF analysis for the latitudinal gradient. +# We also perform an EOF analysis for the seasonality change. + +# %% [markdown] +# ## Imports + +# %% +import cf_xarray.units +import matplotlib.pyplot as plt +import pint_xarray +import xarray as xr +from pydoit_nb.config_handling import get_config_for_step_id + +import local.binned_data_interpolation +import local.binning +import local.latitudinal_gradient +import local.mean_preserving_interpolation +import local.raw_data_processing +import local.seasonality +import local.xarray_space +import local.xarray_time +from local.config import load_config_from_file + +# %% +cf_xarray.units.units.define("ppm = 1 / 1000000") +cf_xarray.units.units.define("ppb = ppm / 1000") +cf_xarray.units.units.define("ppt = ppb / 1000") + +pint_xarray.accessors.default_registry = pint_xarray.setup_registry( + cf_xarray.units.units +) + +# %% [markdown] +# ## Define branch this notebook belongs to + +# %% editable=true slideshow={"slide_type": ""} +step: str = "calculate_sf6_like_monthly_fifteen_degree_pieces" + +# %% [markdown] +# ## Parameters + +# %% editable=true slideshow={"slide_type": ""} tags=["parameters"] +config_file: str = "../../dev-config-absolute.yaml" # config file +step_config_id: str = "hfc134a" # config ID to select for this branch + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# ## Load config + +# %% editable=true slideshow={"slide_type": ""} +config = load_config_from_file(config_file) +config_step = get_config_for_step_id( + config=config, step=step, step_config_id=step_config_id +) + + +# %% [markdown] +# ## Action + +# %% [markdown] +# ### Load data + +# %% +interpolated_spatial: xr.DataArray = xr.load_dataarray( # type: ignore + config_step.observational_network_interpolated_file +).pint.quantify() +interpolated_spatial + +# %% [markdown] +# ### Drop out any years that have nan +# +# These break everything later on + +# %% +interpolated_spatial_no_year_with_nan = local.xarray_time.convert_time_to_year_month( + interpolated_spatial +).dropna("year") +if interpolated_spatial_no_year_with_nan["year"].shape[0] != ( + interpolated_spatial_no_year_with_nan["year"].max() + - interpolated_spatial_no_year_with_nan["year"].min() + + 1 +): + msg = "Missing years, this will not end well" + raise AssertionError(msg) + +interpolated_spatial_no_year_with_nan + +# %% +interpolated_spatial_nan_free = local.xarray_time.convert_year_month_to_time( + interpolated_spatial_no_year_with_nan +) +interpolated_spatial_nan_free + +# %% [markdown] +# ### Longitudinal-mean + +# %% +lon_mean = interpolated_spatial_nan_free.mean(dim="lon") +lon_mean.plot(hue="lat") # type: ignore + +# %% [markdown] +# ### Global-, annual-mean + +# %% +global_mean = local.xarray_space.calculate_global_mean_from_lon_mean(lon_mean) +global_mean.plot() # type: ignore + +# %% +global_annual_mean = global_mean.groupby("time.year").mean() +global_annual_mean.plot() # type: ignore + +# %% [markdown] +# ### Latitudinal gradient + +# %% +( + lat_residuals_annual_mean, + lat_gradient_full_eofs_pcs, +) = local.latitudinal_gradient.calculate_eofs_pcs(lon_mean, global_mean) +lat_gradient_full_eofs_pcs + +# %% +fig, ax = plt.subplots() +n_eofs_to_show = 3 + +for i in range(n_eofs_to_show): + ax.plot( + lat_gradient_full_eofs_pcs["principal-components"].sel(eof=i).data.m, + label=f"Principal component {i}", + ) + +ax.legend(loc="center left", bbox_to_anchor=(1.05, 0.5)) + +# %% +fig, ax = plt.subplots() + +for i in range(3): + ax.plot( + lat_gradient_full_eofs_pcs["eofs"].sel(eof=i).data.m, + lat_gradient_full_eofs_pcs["lat"].data, + label=f"EOF {i}", + zorder=2 - i / 10, + ) + +ax.legend(loc="center left", bbox_to_anchor=(1.05, 0.5)) + +# %% +fig, ax = plt.subplots() + +for i in range(3): + ax.plot( + lat_gradient_full_eofs_pcs["principal-components"].sel(eof=i).isel(year=1) + @ lat_gradient_full_eofs_pcs["eofs"].sel(eof=i), + lat_gradient_full_eofs_pcs["lat"], + label=f"EOF {i}", + zorder=2 - i / 10, + ) + +ax.legend(loc="center left", bbox_to_anchor=(1.05, 0.5)) + +# %% +lat_gradient_eofs_pcs = lat_gradient_full_eofs_pcs.sel( + eof=range(config_step.lat_gradient_n_eofs_to_use) +) +lat_gradient_eofs_pcs + +# %% +latitudinal_anomaly_from_eofs = ( + lat_gradient_eofs_pcs["principal-components"] @ lat_gradient_eofs_pcs["eofs"] +) + +for year in latitudinal_anomaly_from_eofs["year"]: + if year % 5: + continue + + fig, axes = plt.subplots(nrows=3, sharex=True, sharey=True) + + selected = lat_residuals_annual_mean.sel(year=year) + axes[0].plot(selected.data.m, lat_residuals_annual_mean.lat) + axes[0].set_title("Full anomaly") + + selected_eofs = latitudinal_anomaly_from_eofs.sel(year=year) + axes[1].plot(selected_eofs.data.m, latitudinal_anomaly_from_eofs.lat) + axes[1].set_title("Anomaly from EOFs") + + axes[2].plot(selected.data.m - selected_eofs.data.m, lat_residuals_annual_mean.lat) + axes[2].set_title("Difference") + + axes[0].set_ylim([-90, 90]) + + plt.suptitle(str(int(year))) + plt.tight_layout() + plt.show() + +# %% [markdown] +# ### Seasonality + +# %% +( + seasonality, + relative_seasonality, + lon_mean_ym_monthly_anomalies, +) = local.seasonality.calculate_seasonality( + lon_mean=lon_mean, + global_mean=global_mean, +) + +# %% +lon_mean_ym_monthly_anomalies.plot.line(hue="lat", col="year", col_wrap=3) + +# %% +seasonality.plot.line(hue="lat") + +# %% +relative_seasonality.plot.line(hue="lat") + +# %% [markdown] +# ### Save + +# %% +config_step.observational_network_global_annual_mean_file.parent.mkdir( + exist_ok=True, parents=True +) +global_annual_mean.pint.dequantify().to_netcdf( + config_step.observational_network_global_annual_mean_file +) +global_annual_mean + +# %% +config_step.observational_network_latitudinal_gradient_eofs_file.parent.mkdir( + exist_ok=True, parents=True +) +lat_gradient_eofs_pcs.pint.dequantify().to_netcdf( + config_step.observational_network_latitudinal_gradient_eofs_file +) +lat_gradient_eofs_pcs + +# %% +# Use relative seasonality +config_step.observational_network_seasonality_file.parent.mkdir( + exist_ok=True, parents=True +) +relative_seasonality.pint.dequantify().to_netcdf( + config_step.observational_network_seasonality_file +) +relative_seasonality diff --git a/notebooks/13yy_sf6-like-monthly-15-degree/1303_sf6-like_extend-lat-gradient-pcs.py b/notebooks/13yy_sf6-like-monthly-15-degree/1303_sf6-like_extend-lat-gradient-pcs.py new file mode 100644 index 00000000..9fea7ab4 --- /dev/null +++ b/notebooks/13yy_sf6-like-monthly-15-degree/1303_sf6-like_extend-lat-gradient-pcs.py @@ -0,0 +1,390 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# # SF$_6$-like - extend the latitudinal gradient principal components +# +# Extend the latitudinal gradient's principal components back in time. +# For SF$_6$-like gases, we do this by using a regression against emissions. + +# %% [markdown] +# ## Imports + +# %% +from collections.abc import Iterator +from contextlib import contextmanager + +import cf_xarray.units +import matplotlib.axes +import matplotlib.pyplot as plt +import numpy as np +import openscm_units +import pandas as pd +import pint +import pint_xarray +import xarray as xr +from pydoit_nb.config_handling import get_config_for_step_id + +import local.binned_data_interpolation +import local.binning +import local.config +import local.latitudinal_gradient +import local.mean_preserving_interpolation +import local.raw_data_processing +import local.regressors +import local.seasonality +import local.xarray_space +import local.xarray_time +from local.config import load_config_from_file + +# %% +cf_xarray.units.units.define("ppm = 1 / 1000000") +cf_xarray.units.units.define("ppb = ppm / 1000") +cf_xarray.units.units.define("ppt = ppb / 1000") + +pint_xarray.accessors.default_registry = pint_xarray.setup_registry( + cf_xarray.units.units +) + +Quantity = pint.get_application_registry().Quantity # type: ignore + +# %% +pint_xarray.setup_registry(openscm_units.unit_registry) + +QuantityOSCM = openscm_units.unit_registry.Quantity + +# %% [markdown] +# ## Define branch this notebook belongs to + +# %% editable=true slideshow={"slide_type": ""} +step: str = "calculate_sf6_like_monthly_fifteen_degree_pieces" + +# %% [markdown] +# ## Parameters + +# %% editable=true slideshow={"slide_type": ""} tags=["parameters"] +config_file: str = "../../dev-config-absolute.yaml" # config file +step_config_id: str = "cfc11" # config ID to select for this branch + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# ## Load config + +# %% editable=true slideshow={"slide_type": ""} +config = load_config_from_file(config_file) +config_step = get_config_for_step_id( + config=config, step=step, step_config_id=step_config_id +) + +config_historical_emissions = get_config_for_step_id( + config=config, step="compile_historical_emissions", step_config_id="only" +) +config_retrieve_misc = get_config_for_step_id( + config=config, step="retrieve_misc_data", step_config_id="only" +) + + +# %% [markdown] +# ## Action + +# %% [markdown] +# ### Helper functions + + +# %% +@contextmanager +def axes_vertical_split( + ncols: int = 2, +) -> Iterator[tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]]: + """Get two split axes, formatting after exiting the context""" + fig, axes = plt.subplots(ncols=ncols) + yield axes + plt.tight_layout() + plt.show() + + +# %% [markdown] +# ### Load data + +# %% +global_annual_mean_obs_network: xr.DataArray = xr.load_dataarray( # type: ignore + config_step.observational_network_global_annual_mean_file +).pint.quantify() +global_annual_mean_obs_network + +# %% +lat_grad_eofs_obs_network = xr.load_dataset( + config_step.observational_network_latitudinal_gradient_eofs_file +).pint.quantify() +lat_grad_eofs_obs_network + +# %% +historical_emissions = pd.read_csv( + config_historical_emissions.complete_historical_emissions_file +) +historical_emissions = historical_emissions[ + historical_emissions["variable"] == f"Emissions|{config_step.gas}" +] +if historical_emissions.empty: + msg = "No data found for gas, check your config" + raise AssertionError(msg) +historical_emissions + +# %% [markdown] +# ### Define some important constants + +# %% +if not config.ci: + out_years = np.arange(1, lat_grad_eofs_obs_network["year"].max() + 1) + +else: + out_years = np.arange(1750, lat_grad_eofs_obs_network["year"].max() + 1) + +out_years + +# %% +obs_network_years = lat_grad_eofs_obs_network["year"] +obs_network_years + +# %% +use_extensions_years = np.setdiff1d(out_years, obs_network_years) +use_extensions_years + +# %% [markdown] +# ### Extend PC zero +# +# (Zero-indexing, hence this is the first PC) +# +# This happens in a few steps. + +# %% +# Quick assertion that things are as expected +exp_n_eofs = 1 +if len(lat_grad_eofs_obs_network["eof"]) != exp_n_eofs: + raise AssertionError("Rethink") + +# %% [markdown] +# #### Use a regression against emissions to fill in the gap +# +# There is a gap between the observational network period +# and the optimised ice core period. +# We fill this using a regression against emissions. + +# %% +historical_emissions = historical_emissions.rename({"time": "year"}, axis="columns") +historical_emissions + +# %% +regression_years = np.intersect1d( + lat_grad_eofs_obs_network["year"], historical_emissions["year"] +) +regression_years + +# %% +unit = historical_emissions["unit"].unique() +assert len(unit) == 1 +unit = unit[0] + +historical_emissions_xr = xr.DataArray( + historical_emissions["value"], + dims=("year",), + coords=dict(year=historical_emissions["year"]), + attrs={"units": unit}, +).pint.quantify(unit_registry=openscm_units.unit_registry) +historical_emissions_xr = historical_emissions_xr.sel( + year=historical_emissions_xr["year"] <= regression_years.max() +) + + +historical_emissions_xr + +# %% +historical_emissions_regression_data = historical_emissions_xr.sel( + year=regression_years +) + +historical_emissions_regression_data + +# %% +pc0_obs_network = lat_grad_eofs_obs_network["principal-components"].sel(eof=0) +pc0_obs_network_regression = pc0_obs_network.sel(year=regression_years) +pc0_obs_network + +# %% +with axes_vertical_split() as axes: + historical_emissions_regression_data.plot(ax=axes[0]) + pc0_obs_network_regression.plot(ax=axes[1]) + +# %% +# The code below fixes the y-intercept to be zero, so that the latitudinal gradient is zero +# when emissions are zero. +# This is fine for gases like SF6, whose pre-industrial concentrations are zero. +# Have to be more careful when the pre-industrial concentrations are non-zero. +x = QuantityOSCM( + historical_emissions_regression_data.data.m, + str(historical_emissions_regression_data.data.units), +) +A = x.m[:, np.newaxis] +y = QuantityOSCM( + pc0_obs_network_regression.data.m, str(pc0_obs_network_regression.data.units) +) + +res = np.linalg.lstsq(A, y.m, rcond=None) +m = res[0] +m = QuantityOSCM(m, (y / x).units) +c = QuantityOSCM(0.0, y.units) + +latitudinal_gradient_pc0_total_emissions_regression = ( + local.regressors.LinearRegressionResult(m=m, c=c) +) + +fig, ax = plt.subplots() +ax.scatter(x.m, y.m, label="raw data") +ax.plot(x.m, (m * x + c).m, color="tab:orange", label="regression") +ax.plot( + np.hstack([0, x.m]), + (m * np.hstack([0, x]) + c).m, + color="tab:orange", + label="regression-extended", +) +ax.set_ylabel("PC0") +ax.set_xlabel("PRIMAP emissions") +ax.legend() + +# %% [markdown] +# Extend historical emissions back to year 1, assuming constant before 1750. + +# %% +historical_emissions_extension_years = np.union1d( + np.setdiff1d(out_years, pc0_obs_network["year"]), + historical_emissions_xr["year"], +) +historical_emissions_extension_years + +# %% +historical_emissions_extended = historical_emissions_xr.copy() +historical_emissions_extended = historical_emissions_extended.pint.dequantify().interp( + year=historical_emissions_extension_years, + kwargs={"fill_value": historical_emissions_xr.data[0].m}, +) + +with axes_vertical_split() as axes: + historical_emissions_extended.plot(ax=axes[0]) + historical_emissions_extended.sel(year=range(1950, 2023)).plot(ax=axes[1]) + +historical_emissions_extended + +# %% +years_to_fill_with_regression = np.setdiff1d( + historical_emissions_extended["year"], + pc0_obs_network["year"], +) + +years_to_fill_with_regression + +# %% +pc0_emissions_extended = ( + m + * historical_emissions_extended.sel( + year=years_to_fill_with_regression + ).pint.quantify(unit_registry=openscm_units.unit_registry) + + c +) +pc0_emissions_extended = pc0_emissions_extended.assign_coords(eof=0) +pc0_emissions_extended + +# %% [markdown] +# #### Concatenate the pieces of PC0 +# +# Join: +# +# - extended based on regression with emissions +# - raw values derived from the observational network + +# %% +allyears_pc0 = xr.concat( + [ + pc0_emissions_extended, + pc0_obs_network, + ], + "year", +) + +with axes_vertical_split() as axes: + allyears_pc0.plot(ax=axes[0]) + + pc0_emissions_extended.plot(ax=axes[1]) + pc0_obs_network.plot(ax=axes[1]) + +allyears_pc0 + +# %% [markdown] +# ### Join the PCs back together + +# %% +allyears_pcs = xr.concat([allyears_pc0], "eof").pint.dequantify().pint.quantify() +allyears_pcs + +# %% [markdown] +# ### Join PCs and EOFs back together + +# %% +allyears_pcs.name = "principal-components" +out = xr.merge([allyears_pcs, lat_grad_eofs_obs_network["eofs"]]) +out + +# %% +(out["principal-components"] @ out["eofs"]).sel(year=2022).plot() # type: ignore + +# %% +(out["principal-components"] @ out["eofs"]).sel(year=1980).plot() # type: ignore + +# %% +(out["principal-components"] @ out["eofs"]).sel(year=out["year"].min()).plot() # type: ignore + +# %% [markdown] +# Quick check that our output matches the observational network in the years they overlap. + +# %% +xr.testing.assert_allclose( + (out["principal-components"] @ out["eofs"]).sel( + year=lat_grad_eofs_obs_network["year"] + ), + lat_grad_eofs_obs_network["principal-components"] + @ lat_grad_eofs_obs_network["eofs"], +) + +# %% [markdown] +# ## Save + +# %% +config_step.latitudinal_gradient_allyears_pcs_eofs_file.parent.mkdir( + exist_ok=True, parents=True +) +out.pint.dequantify().to_netcdf(config_step.latitudinal_gradient_allyears_pcs_eofs_file) +out + +# %% +config_step.latitudinal_gradient_pc0_total_emissions_regression_file.parent.mkdir( + exist_ok=True, parents=True +) +with open( + config_step.latitudinal_gradient_pc0_total_emissions_regression_file, "w" +) as fh: + fh.write( + local.config.converter_yaml.dumps( + latitudinal_gradient_pc0_total_emissions_regression + ) + ) + +latitudinal_gradient_pc0_total_emissions_regression diff --git a/notebooks/13yy_sf6-like-monthly-15-degree/1304_sf6-like_extend-global-annual-mean.py b/notebooks/13yy_sf6-like-monthly-15-degree/1304_sf6-like_extend-global-annual-mean.py new file mode 100644 index 00000000..45c53046 --- /dev/null +++ b/notebooks/13yy_sf6-like-monthly-15-degree/1304_sf6-like_extend-global-annual-mean.py @@ -0,0 +1,385 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# # SF$_6$-like - extend the global-, annual-mean +# +# Extend the global-, annual-mean from the observational network back in time. +# For SF$_6$, we do this by combining the values from other data sources +# with an assumption about when zero was reached. + +# %% [markdown] +# ## Imports + +# %% +from collections.abc import Iterator +from contextlib import contextmanager +from typing import Any + +import cf_xarray.units +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import openscm_units +import pandas as pd +import pint +import pint_xarray +import xarray as xr +from pydoit_nb.config_handling import get_config_for_step_id + +import local.binned_data_interpolation +import local.binning +import local.global_mean_extension +import local.latitudinal_gradient +import local.mean_preserving_interpolation +import local.raw_data_processing +import local.seasonality +import local.xarray_space +import local.xarray_time +from local.config import load_config_from_file + +# %% +cf_xarray.units.units.define("ppm = 1 / 1000000") +cf_xarray.units.units.define("ppb = ppm / 1000") +cf_xarray.units.units.define("ppt = ppb / 1000") + +pint_xarray.accessors.default_registry = pint_xarray.setup_registry( + cf_xarray.units.units +) + +Quantity = pint.get_application_registry().Quantity # type: ignore + +# %% +QuantityOSCM = openscm_units.unit_registry.Quantity + +# %% [markdown] +# ## Define branch this notebook belongs to + +# %% editable=true slideshow={"slide_type": ""} +step: str = "calculate_sf6_like_monthly_fifteen_degree_pieces" + +# %% [markdown] +# ## Parameters + +# %% editable=true slideshow={"slide_type": ""} tags=["parameters"] +config_file: str = "../../dev-config-absolute.yaml" # config file +step_config_id: str = "sf6" # config ID to select for this branch + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# ## Load config + +# %% editable=true slideshow={"slide_type": ""} +config = load_config_from_file(config_file) +config_step = get_config_for_step_id( + config=config, step=step, step_config_id=step_config_id +) + + +# %% [markdown] +# ## Action + +# %% [markdown] +# ### Helper functions + + +# %% +def get_col_assert_single_value(idf: pd.DataFrame, col: str) -> Any: + """Get a column's value, asserting that it only has one value""" + res = idf[col].unique() + if len(res) != 1: + raise AssertionError + + return res[0] + + +# %% +@contextmanager +def axes_vertical_split( + ncols: int = 2, +) -> Iterator[tuple[matplotlib.axes.Axes, matplotlib.axes.Axes]]: + """Get two split axes, formatting after exiting the context""" + fig, axes = plt.subplots(ncols=2) + yield axes + plt.tight_layout() + plt.show() + + +# %% [markdown] +# ### Load data + +# %% +global_mean_supplement_files = ( + local.global_mean_extension.get_global_mean_supplement_files( + gas=config_step.gas, config=config + ) +) +global_mean_supplement_files + +# %% +if global_mean_supplement_files: + raise NotImplementedError + # global_mean_supplements_l = [] + # for f in global_mean_supplement_files: + # try: + # global_mean_supplements_l.append( + # local.raw_data_processing.read_and_check_global_mean_supplementing_columns( + # f + # ) + # ) + # except Exception as exc: + # msg = f"Error reading {f}" + # raise ValueError(msg) from exc + # + # global_mean_supplements = pd.concat(global_mean_supplements_l) + # # TODO: add check of gas names to processed data checker + # # global_mean_supplements["gas"] = global_mean_supplements["gas"].str.lower() + # assert global_mean_supplements["gas"].unique().tolist() == [config_step.gas] + +else: + global_mean_supplements = None + +global_mean_supplements + +# %% +global_annual_mean_obs_network = xr.load_dataarray( # type: ignore + config_step.observational_network_global_annual_mean_file +).pint.quantify() +global_annual_mean_obs_network + +# %% +lat_grad_eofs_allyears = xr.load_dataset( + config_step.latitudinal_gradient_allyears_pcs_eofs_file +).pint.quantify() +lat_grad_eofs_allyears + +# %% [markdown] +# #### Create annual-mean latitudinal gradient +# +# This can be combined with our hemispheric information if needed. + +# %% +allyears_latitudinal_gradient = ( + lat_grad_eofs_allyears["principal-components"] @ lat_grad_eofs_allyears["eofs"] +) +allyears_latitudinal_gradient + +# %% [markdown] +# ### Define some important constants + +# %% +if not config.ci: + out_years = np.arange(1, global_annual_mean_obs_network["year"].max() + 1) + +else: + out_years = np.arange(1750, global_annual_mean_obs_network["year"].max() + 1) + +out_years + +# %% +obs_network_years = global_annual_mean_obs_network["year"] +obs_network_years + +# %% +use_extensions_years = np.setdiff1d(out_years, obs_network_years) +use_extensions_years + +# %% [markdown] +# ### Extend global-, annual-mean +# +# This happens in a few steps. + +# %% +global_annual_mean_composite = global_annual_mean_obs_network.copy() + +# %% [markdown] +# #### Use other global-mean sources + +# %% +if global_mean_supplements is not None: + raise NotImplementedError +# if ( +# global_mean_supplements is not None +# and "global" in global_mean_supplements["region"].tolist() +# ): +# # Add in the global stuff here +# msg = ( +# "Add some other global-mean sources handling here " +# "(including what to do in overlap/join periods)" +# ) +# raise NotImplementedError(msg) + +# %% [markdown] +# #### Use other spatial sources + +# %% +if global_mean_supplements is not None: + raise NotImplementedError +# if ( +# global_mean_supplements is not None +# and (~global_mean_supplements["lat"].isnull()).any() +# ): +# # Add in the latitudinal information here +# msg = ( +# "Add some other spatial sources handling here. " +# "That will need to use the gradient information too " +# "(including what to do in overlap/join periods)" +# ) +# raise NotImplementedError(msg) + +# %% [markdown] +# #### Use pre-industrial value and time + +# %% +pre_ind_value = config_step.pre_industrial.value # Quantity(0, "ppt") +pre_ind_year = config_step.pre_industrial.year # 1950 + +# %% +if (global_annual_mean_composite["year"] <= pre_ind_year).any(): + pre_ind_years = global_annual_mean_composite["year"][ + np.where(global_annual_mean_composite["year"] <= pre_ind_year) + ] + msg = ( + f"You have data before your pre-industrial year, please check. {pre_ind_years=}" + ) + raise AssertionError(msg) + +# %% +pre_ind_part = ( + global_annual_mean_composite.pint.dequantify() + .interp( + year=out_years[np.where(out_years <= pre_ind_year)], + kwargs={ + "fill_value": pre_ind_value.to(global_annual_mean_composite.data.units).m + }, + ) + .pint.quantify() +) + +# %% +global_annual_mean_composite = xr.concat( + [pre_ind_part, global_annual_mean_composite], "year" +) +global_annual_mean_composite + +# %% +SHOW_AFTER_YEAR = 1950 +with axes_vertical_split() as axes: + global_annual_mean_composite.plot.line(ax=axes[0]) + global_annual_mean_composite.plot.scatter( + ax=axes[0], alpha=1.0, color="tab:orange", marker="x" + ) + + global_annual_mean_composite.sel( + year=global_annual_mean_composite["year"][ + np.where(global_annual_mean_composite["year"] >= SHOW_AFTER_YEAR) + ] + ).plot.line(ax=axes[1], color="tab:blue") + global_annual_mean_composite.sel( + year=global_annual_mean_composite["year"][ + np.where(global_annual_mean_composite["year"] >= SHOW_AFTER_YEAR) + ] + ).plot.scatter(ax=axes[1], alpha=1.0, color="tab:orange", marker="x") + +# %% [markdown] +# ### Interpolate between to fill missing values + +# %% +global_annual_mean_composite = ( + global_annual_mean_composite.pint.dequantify() + .interp(year=out_years, method="cubic") + .pint.quantify() +) +global_annual_mean_composite + +# %% +SHOW_AFTER_YEAR = 1950 +with axes_vertical_split() as axes: + global_annual_mean_composite.plot.line(ax=axes[0]) + global_annual_mean_composite.plot.scatter( + ax=axes[0], alpha=1.0, color="tab:orange", marker="x" + ) + + global_annual_mean_composite.sel( + year=global_annual_mean_composite["year"][ + np.where(global_annual_mean_composite["year"] >= SHOW_AFTER_YEAR) + ] + ).plot.line(ax=axes[1], color="tab:blue") + global_annual_mean_composite.sel( + year=global_annual_mean_composite["year"][ + np.where(global_annual_mean_composite["year"] >= SHOW_AFTER_YEAR) + ] + ).plot.scatter(ax=axes[1], alpha=1.0, color="tab:orange", marker="x") + +# %% [markdown] +# ### Create full field over all years + +# %% +allyears_full_field = global_annual_mean_composite + allyears_latitudinal_gradient +allyears_full_field + +# %% [markdown] +# #### Observational network +# +# We start with the field we have from the observational network. + +# %% +obs_network_full_field = allyears_latitudinal_gradient + global_annual_mean_obs_network +obs_network_full_field + +# %% [markdown] +# #### Check our full field calculation +# +# If we have got this right the field will: +# +# - be decomposable into: +# - a global-mean timeseries (with dims (year,)) +# - a latitudinal gradient (with dims (year, lat)) +# that has a spatial-mean of zero. +# This latitudinal gradient should match the latitudinal +# gradient we calculated earlier. + +# %% +tmp = allyears_full_field.copy() +tmp.name = "allyears_global_annual_mean" +allyears_global_annual_mean = local.xarray_space.calculate_global_mean_from_lon_mean( + tmp +) +allyears_global_annual_mean + +# %% [markdown] +# The residual between our full field and our annual, global-mean +# should just be the latitudinal gradient we started with. + +# %% +check = allyears_full_field - allyears_global_annual_mean +xr.testing.assert_allclose(check, allyears_latitudinal_gradient) + +# %% +tmp = allyears_latitudinal_gradient.copy() +tmp.name = "tmp" +np.testing.assert_allclose( + local.xarray_space.calculate_global_mean_from_lon_mean(tmp).data.to("ppb").m, + 0.0, + atol=1e-10, +) + +# %% [markdown] +# ## Save + +# %% +config_step.global_annual_mean_allyears_file.parent.mkdir(exist_ok=True, parents=True) +allyears_global_annual_mean.pint.dequantify().to_netcdf( + config_step.global_annual_mean_allyears_file +) +allyears_global_annual_mean diff --git a/notebooks/13yy_sf6-like-monthly-15-degree/1305_sf6-like_create-pieces-for-gridding.py b/notebooks/13yy_sf6-like-monthly-15-degree/1305_sf6-like_create-pieces-for-gridding.py new file mode 100644 index 00000000..e0dc3693 --- /dev/null +++ b/notebooks/13yy_sf6-like-monthly-15-degree/1305_sf6-like_create-pieces-for-gridding.py @@ -0,0 +1,299 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: percent +# format_version: '1.3' +# jupytext_version: 1.16.1 +# kernelspec: +# display_name: Python 3 (ipykernel) +# language: python +# name: python3 +# --- + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# # SF$_6$-like - Create pieces for gridding +# +# Here we create: +# +# - global-, annual-mean, interpolated to monthly timesteps +# - seasonality, extended over all years +# - latitudinal gradient, interpolated to monthly timesteps +# +# Then we check consistency between these pieces. + +# %% [markdown] +# ## Imports + +# %% +import cf_xarray.units +import matplotlib.pyplot as plt +import numpy as np +import pint +import pint_xarray +import xarray as xr +from pydoit_nb.config_handling import get_config_for_step_id + +import local.binned_data_interpolation +import local.binning +import local.latitudinal_gradient +import local.mean_preserving_interpolation +import local.raw_data_processing +import local.seasonality +import local.xarray_space +import local.xarray_time +from local.config import load_config_from_file + +# %% +cf_xarray.units.units.define("ppm = 1 / 1000000") +cf_xarray.units.units.define("ppb = ppm / 1000") +cf_xarray.units.units.define("ppt = ppb / 1000") + +pint_xarray.accessors.default_registry = pint_xarray.setup_registry( + cf_xarray.units.units +) + +Quantity = pint.get_application_registry().Quantity # type: ignore + +# %% [markdown] +# ## Define branch this notebook belongs to + +# %% editable=true slideshow={"slide_type": ""} +step: str = "calculate_sf6_like_monthly_fifteen_degree_pieces" + +# %% [markdown] +# ## Parameters + +# %% editable=true slideshow={"slide_type": ""} tags=["parameters"] +config_file: str = "../../dev-config-absolute.yaml" # config file +step_config_id: str = "sf6" # config ID to select for this branch + +# %% [markdown] editable=true slideshow={"slide_type": ""} +# ## Load config + +# %% editable=true slideshow={"slide_type": ""} +config = load_config_from_file(config_file) +config_step = get_config_for_step_id( + config=config, step=step, step_config_id=step_config_id +) + + +# %% [markdown] +# ## Action + +# %% [markdown] +# ### Load data + +# %% +global_annual_mean: xr.DataArray = xr.load_dataarray( # type: ignore + config_step.global_annual_mean_allyears_file +).pint.quantify() +global_annual_mean + +# %% +obs_network_seasonality: xr.DataArray = xr.load_dataarray( # type: ignore + config_step.observational_network_seasonality_file +).pint.quantify() +obs_network_seasonality + +# %% +lat_gradient_eofs_pcs: xr.Dataset = xr.load_dataset( + config_step.latitudinal_gradient_allyears_pcs_eofs_file +).pint.quantify() +lat_gradient_eofs_pcs + +# %% [markdown] +# ### Calculate global-, annual-mean monthly + +# %% +global_annual_mean_monthly = ( + local.mean_preserving_interpolation.interpolate_annual_mean_to_monthly( + global_annual_mean, + atol=1e-6, # avoid inifite relative error for things which are close to zero + ) +) +global_annual_mean_monthly + +# %% +fig, axes = plt.subplots(ncols=3, figsize=(12, 4)) + +local.xarray_time.convert_year_month_to_time(global_annual_mean_monthly, calendar="proleptic_gregorian").plot( # type: ignore + ax=axes[0] +) +local.xarray_time.convert_year_to_time( + global_annual_mean, calendar="proleptic_gregorian" +).plot.scatter(x="time", color="tab:orange", zorder=3, alpha=0.5, ax=axes[0]) + +local.xarray_time.convert_year_month_to_time( + global_annual_mean_monthly.sel(year=global_annual_mean_monthly["year"][1:10]), + calendar="proleptic_gregorian", +).plot( + ax=axes[1] +) # type: ignore +local.xarray_time.convert_year_to_time( + global_annual_mean.sel(year=global_annual_mean_monthly["year"][1:10]), + calendar="proleptic_gregorian", +).plot.scatter(x="time", color="tab:orange", zorder=3, alpha=0.5, ax=axes[1]) + +local.xarray_time.convert_year_month_to_time( + global_annual_mean_monthly.sel(year=global_annual_mean_monthly["year"][-10:]), + calendar="proleptic_gregorian", +).plot( + ax=axes[2] +) # type: ignore +local.xarray_time.convert_year_to_time( + global_annual_mean.sel(year=global_annual_mean_monthly["year"][-10:]), + calendar="proleptic_gregorian", +).plot.scatter(x="time", color="tab:orange", zorder=3, alpha=0.5, ax=axes[2]) + +plt.tight_layout() +plt.show() + +# %% [markdown] +# ### Calculate seasonality +# +# You have to use the global-, annual-mean on a yearly time axis otherwise the time-mean of the seasonality over each year is not zero. + +# %% +obs_network_seasonality.plot() # type: ignore + +# %% +seasonality_full = global_annual_mean * obs_network_seasonality +np.testing.assert_allclose( + seasonality_full.mean("month").data.m, + 0.0, + atol=1e-6, # Match the tolerance of our mean-preserving interpolation algorithm +) + +# %% +fig, axes = plt.subplots(ncols=2, sharey=True) +local.xarray_time.convert_year_month_to_time( # type: ignore + seasonality_full.sel(year=seasonality_full["year"][-6:]) +).sel(lat=[-82.5, 7.5, 82.5]).plot(x="time", hue="lat", alpha=0.7, ax=axes[0]) + +local.xarray_time.convert_year_month_to_time( # type: ignore + seasonality_full.sel(year=range(1984, 1986)) +).sel(lat=[-82.5, 7.5, 82.5]).plot(x="time", hue="lat", alpha=0.7, ax=axes[1]) + +plt.tight_layout() + +# %% +local.xarray_time.convert_year_month_to_time( # type: ignore + seasonality_full.sel(year=seasonality_full["year"][-6:]) +).plot(x="time", hue="lat", alpha=0.7, col="lat", col_wrap=3, sharey=True) + +# %% +local.xarray_time.convert_year_month_to_time(seasonality_full, calendar="proleptic_gregorian").plot( # type: ignore + x="time", hue="lat", alpha=0.7, col="lat", col_wrap=3, sharey=True +) + +# %% [markdown] +# ### Latitudinal gradient, monthly +# +# We interpolate the PCs, then apply these to the EOFs. +# This is much faster than interpolating each latitude separately +# and ensures that we preserve a spatial-mean of zero +# in our latitudinal gradient. + +# %% +for degrees_freedom_scalar in np.arange(1.1, 2.1, 0.1): + try: + pcs_monthly = ( + lat_gradient_eofs_pcs["principal-components"] # type: ignore + .groupby("eof", squeeze=False) + .apply( + local.mean_preserving_interpolation.interpolate_annual_mean_to_monthly, + degrees_freedom_scalar=degrees_freedom_scalar, + atol=1e-7, + ) + ) + print(f"Run succeeded with {degrees_freedom_scalar=}") + break + except AssertionError: + print(f"Run failed with {degrees_freedom_scalar=}") + continue + +else: + msg = "Mean-preserving interpolation failed, consider increasing degrees_freedom_scalar" + raise AssertionError(msg) + +pcs_monthly + +# %% +pcs_annual = lat_gradient_eofs_pcs["principal-components"] + +fig, axes = plt.subplots(ncols=3, figsize=(12, 4)) + +local.xarray_time.convert_year_month_to_time( + pcs_monthly, calendar="proleptic_gregorian" +).plot(ax=axes[0], hue="eof") +local.xarray_time.convert_year_to_time( + pcs_annual, calendar="proleptic_gregorian" +).plot.scatter(x="time", hue="eof", zorder=3, alpha=0.5, ax=axes[0]) + +local.xarray_time.convert_year_month_to_time( + pcs_monthly.sel(year=pcs_monthly["year"][1:10]), calendar="proleptic_gregorian" +).plot(ax=axes[1], hue="eof") +local.xarray_time.convert_year_to_time( + pcs_annual.sel(year=pcs_monthly["year"][1:10]), calendar="proleptic_gregorian" +).plot.scatter(x="time", hue="eof", zorder=3, alpha=0.5, ax=axes[1]) + +local.xarray_time.convert_year_month_to_time( + pcs_monthly.sel(year=pcs_monthly["year"][-10:]), calendar="proleptic_gregorian" +).plot(ax=axes[2], hue="eof") +local.xarray_time.convert_year_to_time( + pcs_annual.sel(year=pcs_monthly["year"][-10:]), calendar="proleptic_gregorian" +).plot.scatter(x="time", hue="eof", zorder=3, alpha=0.5, ax=axes[2]) + +plt.tight_layout() +plt.show() + +# %% +latitudinal_gradient_monthly = pcs_monthly @ lat_gradient_eofs_pcs["eofs"] + +# Ensure spatial mean is zero +tmp = latitudinal_gradient_monthly +tmp.name = "latitudinal-gradient" +np.testing.assert_allclose( + local.xarray_space.calculate_global_mean_from_lon_mean(tmp).data.to("ppb").m, + 0.0, + atol=1e-10, +) + +latitudinal_gradient_monthly + +# %% +local.xarray_time.convert_year_month_to_time( + latitudinal_gradient_monthly, calendar="proleptic_gregorian" +).plot(hue="lat") + +# %% [markdown] +# ### Save + +# %% +config_step.global_annual_mean_allyears_monthly_file.parent.mkdir( + exist_ok=True, parents=True +) +global_annual_mean_monthly.pint.dequantify().to_netcdf( + config_step.global_annual_mean_allyears_monthly_file +) +global_annual_mean_monthly + +# %% +config_step.seasonality_allyears_fifteen_degree_monthly_file.parent.mkdir( + exist_ok=True, parents=True +) +seasonality_full.pint.dequantify().to_netcdf( + config_step.seasonality_allyears_fifteen_degree_monthly_file +) +seasonality_full + +# %% +config_step.latitudinal_gradient_fifteen_degree_allyears_monthly_file.parent.mkdir( + exist_ok=True, parents=True +) +latitudinal_gradient_monthly.pint.dequantify().to_netcdf( + config_step.latitudinal_gradient_fifteen_degree_allyears_monthly_file +) +latitudinal_gradient_monthly diff --git a/notebooks/30yy_grid/3001_crunch-grids.py b/notebooks/30yy_grid/3001_crunch-grids.py index 88d9df44..fa3a76f1 100644 --- a/notebooks/30yy_grid/3001_crunch-grids.py +++ b/notebooks/30yy_grid/3001_crunch-grids.py @@ -53,6 +53,7 @@ # %% cf_xarray.units.units.define("ppm = 1 / 1000000") cf_xarray.units.units.define("ppb = ppm / 1000") +cf_xarray.units.units.define("ppt = ppb / 1000") pint_xarray.accessors.default_registry = pint_xarray.setup_registry( cf_xarray.units.units @@ -80,10 +81,17 @@ config=config, step=step, step_config_id=step_config_id ) +if config_step.gas in ("co2", "ch4", "n2o"): + step = f"calculate_{config_step.gas}_monthly_fifteen_degree_pieces" + step_config_id = "only" +else: + step = "calculate_sf6_like_monthly_fifteen_degree_pieces" + step_config_id = config_step.gas + config_gridding_pieces_step = get_config_for_step_id( config=config, - step=f"calculate_{config_step.gas}_monthly_fifteen_degree_pieces", - step_config_id="only", + step=step, + step_config_id=step_config_id, ) @@ -117,9 +125,21 @@ # %% [markdown] # ### 15° monthly file +# %% +seasonality_monthly_use = seasonality_monthly.copy() +seasonality_monthly_use_month_mean = seasonality_monthly_use.mean("month") +if np.isclose(seasonality_monthly_use_month_mean.data.m, 0.0, atol=1e-7).all(): + # Force the data to zero. This is a bit of a hack, but also basically fine. + print(f"Applying max shift of {seasonality_monthly_use_month_mean.max()}") + seasonality_monthly_use = ( + seasonality_monthly_use - seasonality_monthly_use_month_mean + ) + +seasonality_monthly_use.mean("month") + # %% gridding_values = ( - xr.merge([seasonality_monthly.copy(), lat_grad_fifteen_degree_monthly]) + xr.merge([seasonality_monthly_use.copy(), lat_grad_fifteen_degree_monthly]) .cf.add_bounds("lat") .pint.quantify({"lat_bounds": "deg"}) ) @@ -212,6 +232,7 @@ .apply(local.xarray_space.calculate_global_mean_from_lon_mean) .transpose("year", "month", "lat_bins") .data.m, + atol=1e-6, # Tolerance of our mean-preserving algorithm ) # %% diff --git a/notebooks/40yy_write-input4mips/4001_write-input4mips-files.py b/notebooks/40yy_write-input4mips/4001_write-input4mips-files.py index 00c6f7ba..1099d3c3 100644 --- a/notebooks/40yy_write-input4mips/4001_write-input4mips-files.py +++ b/notebooks/40yy_write-input4mips/4001_write-input4mips-files.py @@ -31,6 +31,8 @@ from functools import partial import cf_xarray.units +import cftime +import numpy as np import pint_xarray import tqdm.autonotebook as tqdman import xarray as xr @@ -56,6 +58,7 @@ # %% cf_xarray.units.units.define("ppm = 1 / 1000000") cf_xarray.units.units.define("ppb = ppm / 1000") +cf_xarray.units.units.define("ppt = ppb / 1000") pint_xarray.accessors.default_registry = pint_xarray.setup_registry( cf_xarray.units.units @@ -72,7 +75,7 @@ # %% editable=true slideshow={"slide_type": ""} tags=["parameters"] config_file: str = "../../dev-config-absolute.yaml" # config file -step_config_id: str = "co2" # config ID to select for this branch +step_config_id: str = "sf6" # config ID to select for this branch # %% [markdown] editable=true slideshow={"slide_type": ""} # ## Load config @@ -137,9 +140,30 @@ def chop_time_axis(inp: xr.DataArray) -> xr.DataArray: gmnhsh_data_raw_chopped = chop_time_axis(gmnhsh_data_raw) gmnhsh_annual_data_raw_chopped = chop_time_axis(gmnhsh_annual_data_raw) + # %% [markdown] # ### Convert everything to a time axis + +# %% +def get_displayable_dataarray(inp: xr.DataArray) -> xr.DataArray: + """ + Get a :obj:`xr.DataArray` which we can dispaly + + There is some bug in xarray's HTML representation which + means this doesn't work with a proleptic_gregorian calendar. + """ + res = inp.copy() + res["time"] = np.array( + [ + cftime.datetime(v.year, v.month, v.day, v.hour, calendar="standard") + for v in inp["time"].values + ] + ) + + return res + + # %% day = 15 fifteen_degree_data = local.xarray_time.convert_year_month_to_time( @@ -151,13 +175,17 @@ def chop_time_axis(inp: xr.DataArray) -> xr.DataArray: gmnhsh_data = local.xarray_time.convert_year_month_to_time( gmnhsh_data_raw_chopped, day=day ) -fifteen_degree_data +get_displayable_dataarray(fifteen_degree_data) # %% gmnhsh_annual_data = local.xarray_time.convert_year_to_time( - gmnhsh_annual_data_raw_chopped, month=7, day=2, hour=12 + gmnhsh_annual_data_raw_chopped, + month=7, + day=2, + hour=12, + calendar="proleptic_gregorian", ) -gmnhsh_annual_data +get_displayable_dataarray(gmnhsh_annual_data) # %% [markdown] # ### Set common metadata @@ -197,6 +225,10 @@ def chop_time_axis(inp: xr.DataArray) -> xr.DataArray: "co2": "mole_fraction_of_carbon_dioxide_in_air", "ch4": "mole_fraction_of_methane_in_air", "n2o": "mole_fraction_of_nitrous_oxide_in_air", + "sf6": "mole_fraction_of_sulfur_hexafluoride_in_air", + "cfc11": "mole_fraction_of_cfc11_in_air", + "cfc12": "mole_fraction_of_cfc12_in_air", + "hfc134a": "mole_fraction_of_hfc134a_in_air", } # %% [markdown] @@ -226,8 +258,8 @@ def chop_time_axis(inp: xr.DataArray) -> xr.DataArray: {variable_name_raw: variable_name_output} ) da_to_write["time"].encoding = { - "calendar": "standard", - "units": "days since 2010-01-01", + "calendar": "proleptic_gregorian", + "units": "days since 1850-01-01", } # TODO: use inference again once I know how it is meant to work # metadata_inferred, metadata_inferred_optional = infer_metadata_from_dataset( diff --git a/poetry.lock b/poetry.lock index 1efec2c8..fd1fa17e 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1291,13 +1291,13 @@ files = [ [[package]] name = "input4mips-validation" -version = "0.3.1" +version = "0.4.0" description = "Validation of input4MIPs data (checking file formats, metadata etc.)." optional = false python-versions = "<4.0,>=3.9" files = [ - {file = "input4mips_validation-0.3.1-py3-none-any.whl", hash = "sha256:de93046a7196cd303c25d44fc409bb85adc6dfeb4f5c5da01b460108df93251e"}, - {file = "input4mips_validation-0.3.1.tar.gz", hash = "sha256:05c7367159f04f60a8e817d3fefc30cde7abe3b07d71790bee804d0e87d81a2f"}, + {file = "input4mips_validation-0.4.0-py3-none-any.whl", hash = "sha256:28374a33a42fafdd33ea6f9ae3c22c18e293686e061a8b10f5f5b1c2833564f1"}, + {file = "input4mips_validation-0.4.0.tar.gz", hash = "sha256:0e4f38cb44dd3c3aef4f494b818ce8e07bd99e3249c393b11facd8c6cff64e9b"}, ] [package.dependencies] @@ -1306,7 +1306,7 @@ cf-xarray = ">=0.8" cftime = ">=1.6" loguru = ">=0.7" netcdf4 = ">=1.0" -numpy = ">=1.23" +numpy = ">=1.23,<2.0" pint-xarray = ">=0.3" pooch = ">=1.0" typer = ">=0.9.0,<0.10.0" @@ -4273,4 +4273,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.12" -content-hash = "cb9b5a32acdfff3e4ff2127c4a1ff0447c7058f5f6581345ffe1dd22b063313b" +content-hash = "8d36bae15e1e801604d4f1a5221896e37ebe0da73d92f32bbe817b75d5cee293" diff --git a/pyproject.toml b/pyproject.toml index 6e48630d..7139036a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ xarray = "2024.1.1" pint-xarray = "0.3" nc-time-axis = "1.4.1" netcdf4 = "1.6.5" -input4mips-validation = "0.3.1" +input4mips-validation = "0.4.0" pydoit-nb = "0.3.4" carpet-concentrations = "0.5.1" beautifulsoup4 = "4.12.3" diff --git a/scripts/create-dev-ci-config-absolute.py b/scripts/create-dev-ci-config-absolute.py index bcc4be68..1941c6af 100644 --- a/scripts/create-dev-ci-config-absolute.py +++ b/scripts/create-dev-ci-config-absolute.py @@ -53,7 +53,7 @@ ( "ch4", "in-situ", - "c8ad74288d860c63b6a027df4d7bf6742e772fc4e3f99a4052607a382d7fefb2", + "0ac02608ec33f6057e496463e2cf755970d40f4e653a6a5a7d6d14ec519b863e", ), ( "n2o", diff --git a/scripts/write-config.py b/scripts/write-config.py index 4ec3deb1..852c3c8a 100644 --- a/scripts/write-config.py +++ b/scripts/write-config.py @@ -2,6 +2,8 @@ Write our configuration files """ +# Have to the pint registry before doing other imports, hence funny order +# ruff: noqa: E402 from __future__ import annotations from pathlib import Path @@ -10,11 +12,17 @@ import pint from pydoit_nb.config_handling import insert_path_prefix +pint.set_application_registry(openscm_units.unit_registry) + + import local from local.config import Config, converter_yaml from local.config.plot_input_data_overviews import PlotInputDataOverviewsConfig from local.config_creation.agage_handling import create_agage_handling_config from local.config_creation.ale_handling import RETRIEVE_AND_EXTRACT_ALE_STEPS +from local.config_creation.compile_historical_emissions import ( + COMPILE_HISTORICAL_EMISSIONS_STEPS, +) from local.config_creation.crunch_grids import create_crunch_grids_config from local.config_creation.epica_handling import RETRIEVE_AND_PROCESS_EPICA_STEPS from local.config_creation.gage_handling import RETRIEVE_AND_EXTRACT_GAGE_STEPS @@ -30,14 +38,13 @@ from local.config_creation.retrieve_misc_data import RETRIEVE_MISC_DATA_STEPS from local.config_creation.write_input4mips import create_write_input4mips_config -pint.set_application_registry(openscm_units.unit_registry) - def create_dev_config() -> Config: """ Create our (relative) dev config """ - gases_to_write = ("co2", "ch4", "n2o") + gases_to_write = ("co2", "ch4", "n2o", "sf6", "cfc11", "cfc12", "hfc134a") + # cfc11 next start_year = 1 end_year = 2022 @@ -48,12 +55,32 @@ def create_dev_config() -> Config: ("ch4", "in-situ"), ("ch4", "surface-flask"), ("n2o", "hats"), - ("n2o", "surface-flask"), + # # Don't use N2O surface flask to avoid double counting + # ("n2o", "surface-flask"), + ("sf6", "hats"), + # # Don't use SF6 surface flask to avoid double counting + # ("sf6", "surface-flask"), + ("cfc11", "hats"), + ("cfc12", "hats"), + ("hfc134a", "hats"), ) ) retrieve_and_extract_agage_data = create_agage_handling_config( - data_sources=(("ch4", "gc-md", "monthly"), ("n2o", "gc-md", "monthly")) + data_sources=( + ("ch4", "gc-md", "monthly"), + ("n2o", "gc-md", "monthly"), + ("sf6", "gc-md", "monthly"), + ("sf6", "gc-ms-medusa", "monthly"), + ("cfc11", "gc-md", "monthly"), + ("cfc11", "gc-ms-medusa", "monthly"), + ("cfc11", "gc-ms", "monthly"), + ("cfc12", "gc-md", "monthly"), + ("cfc12", "gc-ms-medusa", "monthly"), + ("cfc12", "gc-ms", "monthly"), + ("hfc134a", "gc-ms-medusa", "monthly"), + ("hfc134a", "gc-ms", "monthly"), + ) ) smooth_law_dome_data = create_smooth_law_dome_data_config( @@ -79,6 +106,7 @@ def create_dev_config() -> Config: retrieve_and_process_epica_data=RETRIEVE_AND_PROCESS_EPICA_STEPS, retrieve_and_process_neem_data=RETRIEVE_AND_PROCESS_NEEM_STEPS, plot_input_data_overviews=[PlotInputDataOverviewsConfig(step_config_id="only")], + compile_historical_emissions=COMPILE_HISTORICAL_EMISSIONS_STEPS, smooth_law_dome_data=smooth_law_dome_data, **monthly_fifteen_degree_pieces_configs, crunch_grids=create_crunch_grids_config(gases=gases_to_write), @@ -92,7 +120,8 @@ def create_ci_config() -> Config: """ Create our (relative) CI config """ - gases_to_write = ("co2", "ch4", "n2o") + gases_to_write = ("co2", "ch4", "n2o", "sf6", "cfc11", "cfc12", "hfc134a") + # cfc11 next start_year = 1750 end_year = 2022 @@ -103,12 +132,32 @@ def create_ci_config() -> Config: ("ch4", "in-situ"), ("ch4", "surface-flask"), ("n2o", "hats"), - ("n2o", "surface-flask"), + # # Don't use N2O surface flask to avoid double counting + # ("n2o", "surface-flask"), + ("sf6", "hats"), + # # Don't use SF6 surface flask to avoid double counting + # ("sf6", "surface-flask"), + ("cfc11", "hats"), + ("cfc12", "hats"), + ("hfc134a", "hats"), ) ) retrieve_and_extract_agage_data = create_agage_handling_config( - data_sources=(("ch4", "gc-md", "monthly"), ("n2o", "gc-md", "monthly")) + data_sources=( + ("ch4", "gc-md", "monthly"), + ("n2o", "gc-md", "monthly"), + ("sf6", "gc-md", "monthly"), + ("sf6", "gc-ms-medusa", "monthly"), + ("cfc11", "gc-md", "monthly"), + ("cfc11", "gc-ms-medusa", "monthly"), + ("cfc11", "gc-ms", "monthly"), + ("cfc12", "gc-md", "monthly"), + ("cfc12", "gc-ms-medusa", "monthly"), + ("cfc12", "gc-ms", "monthly"), + ("hfc134a", "gc-ms-medusa", "monthly"), + ("hfc134a", "gc-ms", "monthly"), + ) ) smooth_law_dome_data = create_smooth_law_dome_data_config( @@ -134,6 +183,7 @@ def create_ci_config() -> Config: retrieve_and_process_epica_data=RETRIEVE_AND_PROCESS_EPICA_STEPS, retrieve_and_process_neem_data=RETRIEVE_AND_PROCESS_NEEM_STEPS, plot_input_data_overviews=[PlotInputDataOverviewsConfig(step_config_id="only")], + compile_historical_emissions=COMPILE_HISTORICAL_EMISSIONS_STEPS, smooth_law_dome_data=smooth_law_dome_data, **monthly_fifteen_degree_pieces_configs, crunch_grids=create_crunch_grids_config(gases=gases_to_write), diff --git a/src/local/config/base.py b/src/local/config/base.py index a3f6dac9..0372aca7 100644 --- a/src/local/config/base.py +++ b/src/local/config/base.py @@ -20,6 +20,10 @@ ) from .calculate_co2_monthly_15_degree import CalculateCO2MonthlyFifteenDegreePieces from .calculate_n2o_monthly_15_degree import CalculateN2OMonthlyFifteenDegreePieces +from .calculate_sf6_like_monthly_15_degree import ( + CalculateSF6LikeMonthlyFifteenDegreePieces, +) +from .compile_historical_emissions import CompileHistoricalEmissionsConfig from .crunch_grid import GridCrunchingConfig from .plot_input_data_overviews import PlotInputDataOverviewsConfig from .process_noaa_hats_data import ProcessNOAAHATSDataConfig @@ -188,6 +192,15 @@ class Config: ) """Configurations to use for the plotting step""" + compile_historical_emissions: list[CompileHistoricalEmissionsConfig] = field( + validator=[ + make_attrs_validator_compatible_single_input( + assert_step_config_ids_are_unique + ) + ] + ) + """Configurations to use for the compilation of historical emissions data""" + smooth_law_dome_data: list[SmoothLawDomeDataConfig] = field( validator=[ make_attrs_validator_compatible_single_input( @@ -230,6 +243,17 @@ class Config: ) """Configurations to use for calculating the 15 degree, monthly data for N2O""" + calculate_sf6_like_monthly_fifteen_degree_pieces: list[ + CalculateSF6LikeMonthlyFifteenDegreePieces + ] = field( + validator=[ + make_attrs_validator_compatible_single_input( + assert_step_config_ids_are_unique + ) + ] + ) + """Configurations to use for calculating the 15 degree, monthly data for gases we handle like SF6""" + crunch_grids: list[GridCrunchingConfig] = field( validator=[ make_attrs_validator_compatible_single_input( diff --git a/src/local/config/calculate_sf6_like_monthly_15_degree.py b/src/local/config/calculate_sf6_like_monthly_15_degree.py new file mode 100644 index 00000000..47ee8af4 --- /dev/null +++ b/src/local/config/calculate_sf6_like_monthly_15_degree.py @@ -0,0 +1,101 @@ +""" +Config for the calculation of the 15 degree monthly data for gases we handle like SF6 +""" + +from __future__ import annotations + +from pathlib import Path + +import pint +from attrs import frozen + + +@frozen +class CalculateSF6LikeMonthlyFifteenDegreePieces: + """ + Configuration class for the calculation of the 15 degree monthly data pieces for gases we handle like SF6 + """ + + step_config_id: str + """ + ID for this configuration of the step + + Must be unique among all configurations for this step + """ + + gas: str + """Gas to which this config applies (a bit redundant, but handy to be explicit)""" + + pre_industrial: SF6LikePreIndustrialConfig + """Pre-industrial values to use with this gas""" + + processed_bin_averages_file: Path + """Path in which to save the spatial bin averages from the observational networks""" + + processed_all_data_with_bins_file: Path + """Path in which to save all the data from the observational networks, incl. bin information""" + + allow_poleward_extension: bool + """Whether to allow the data to be extended one latitude poleward to fill data gaps""" + + observational_network_interpolated_file: Path + """Path in which to save the interpolated observational network data""" + + observational_network_global_annual_mean_file: Path + """Path in which to save the global-mean of the observational network data""" + + lat_gradient_n_eofs_to_use: int + """Number of EOFs to use for latitudinal gradient calculations""" + + observational_network_latitudinal_gradient_eofs_file: Path + """Path in which to save the latitudinal gradient EOFs of the observational network data""" + + observational_network_seasonality_file: Path + """Path in which to save the seasonality of the observational network data""" + + latitudinal_gradient_allyears_pcs_eofs_file: Path + """ + Path in which to save the latitudinal gradient information for all years + + This contains the PCs and EOFs separately, + but the PCs have been extended to cover all the years of interest. + """ + + latitudinal_gradient_pc0_total_emissions_regression_file: Path + """ + Path in which to save the regression between pc0 and global emissions of the gas + """ + + global_annual_mean_allyears_file: Path + """Path in which to save the global-, annual-mean, extended over all years""" + + global_annual_mean_allyears_monthly_file: Path + """ + Path in which to save the global-, annual-mean, interpolated to monthly steps for all years + """ + + seasonality_allyears_fifteen_degree_monthly_file: Path + """ + Path for the seasonality on a 15 degree grid, interpolated to monthly steps for all years + """ + + latitudinal_gradient_fifteen_degree_allyears_monthly_file: Path + """ + Path for the latitudinal gradient on a 15 degree grid, interpolated to monthly steps for all years + """ + + +@frozen +class SF6LikePreIndustrialConfig: + """ + Pre-industrial configuration for a gas handled like SF6 + """ + + value: pint.registry.UnitRegistry.Quantity + """Pre-industrial value""" + + year: int + """Year, before which the pre-industrial value should apply""" + + source: str + """The source of the pre-industrial value""" diff --git a/src/local/config/compile_historical_emissions.py b/src/local/config/compile_historical_emissions.py new file mode 100644 index 00000000..e8239ce1 --- /dev/null +++ b/src/local/config/compile_historical_emissions.py @@ -0,0 +1,26 @@ +""" +Config for the compilation of historical emissions data +""" + +from __future__ import annotations + +from pathlib import Path + +from attrs import frozen + + +@frozen +class CompileHistoricalEmissionsConfig: + """ + Configuration class for the compilation of historical emissions data + """ + + step_config_id: str + """ + ID for this configuration of the step + + Must be unique among all configurations for this step + """ + + complete_historical_emissions_file: Path + """Path in which to save the complete historical emissions""" diff --git a/src/local/config_creation/agage_handling.py b/src/local/config_creation/agage_handling.py index b0c7e580..c58273ad 100644 --- a/src/local/config_creation/agage_handling.py +++ b/src/local/config_creation/agage_handling.py @@ -55,6 +55,227 @@ known_hash="e4cdc474f71d6f80ac63fe13b7fa61a86ce0361b7995874bf074a01c69facac3", ), ], + ("sf6", "gc-md", "monthly"): [ + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/capegrim-sf6-ecd/ascii/AGAGE-GC-ECD-SF6_CGO_sf6_mon.txt", + known_hash="9ec255bbd55447d8ac521a4683951e0b0aa682d8252a1072e5fa555b90af5aa1", + ) + ], + ("sf6", "gc-ms-medusa", "monthly"): [ + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/barbados/ascii/AGAGE-GCMS-Medusa_RPB_sf6_mon.txt", + known_hash="1611fb6ed6087b506af19ca8a08cdf750e2c185b136b2460ba013ace39b47714", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/capegrim/ascii/AGAGE-GCMS-Medusa_CGO_sf6_mon.txt", + known_hash="d387ab096cc53fae4efa5026eaaba4f3df0ceec7a1afcfc1128687372e6505d3", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/gosan/ascii/AGAGE-GCMS-Medusa_GSN_sf6_mon.txt", + known_hash="b556daf22abd2edf0874b875a60bd02f246315c4c5a9748273362cb210c7e077", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/jungfraujoch/ascii/AGAGE-GCMS-Medusa_JFJ_sf6_mon.txt", + known_hash="030a37af25e2d806d8ac65be66f264f449923a1b8b077c92c647577d4efe3720", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/macehead/ascii/AGAGE-GCMS-Medusa_MHD_sf6_mon.txt", + known_hash="42d1f57226972df7d16786310e95288db0b604033d8fac82b8b03630d36d908a", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/mtecimone/ascii/AGAGE-GCMS-Medusa_CMN_sf6_mon.txt", + known_hash="cb05383d875d6020a0017942551f909954354ed2701a12754da6bdb80e45612f", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/samoa/ascii/AGAGE-GCMS-Medusa_SMO_sf6_mon.txt", + known_hash="95fa2ccc93fe2dfefcb64d1405e81e0bc556a1892e5b4818312003de92eaf23b", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/tacolneston/ascii/AGAGE-GCMS-Medusa_TAC_sf6_mon.txt", + known_hash="12270c0ab91decaaffc0f47710c27de9147a125938c3e98b1c10a98a2922f441", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/trinidad/ascii/AGAGE-GCMS-Medusa_THD_sf6_mon.txt", + known_hash="da146fc89d0e2b2e87846c6e5fc5712820a5f7bff69f054d90ac0ce80a1cf2a7", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/zeppelin/ascii/AGAGE-GCMS-Medusa_ZEP_sf6_mon.txt", + known_hash="2de77c7f417510878c18d4ea452815ac4555ae17719e6786548b063ee471e5bf", + ), + ], + ("cfc11", "gc-md", "monthly"): [ + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/barbados/ascii/AGAGE-GCMD_RPB_cfc-11_mon.txt", + known_hash="7f52297786487e9ede8a785c89b1d793250c4198223bafaa265cdcc268bbc978", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/capegrim/ascii/AGAGE-GCMD_CGO_cfc-11_mon.txt", + known_hash="ebc838159a6ccc0bb98ac16203e927abea3371aa8aea801db572c816761074cd", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/macehead/ascii/AGAGE-GCMD_MHD_cfc-11_mon.txt", + known_hash="aa6ef6875861e0d127fadc4697467b08ba8272d0fd3be91dd63713490de86645", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/samoa/ascii/AGAGE-GCMD_SMO_cfc-11_mon.txt", + known_hash="d1722aa15bf3a77415b97f0f9a1c1e58912e0ace054b00c8767e75666f877318", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/trinidad/ascii/AGAGE-GCMD_THD_cfc-11_mon.txt", + known_hash="9929b38d196ef1397615b51988631f1e790797d86c260f57b387074cb667ef56", + ), + ], + ("cfc11", "gc-ms-medusa", "monthly"): [ + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/gosan/ascii/AGAGE-GCMS-Medusa_GSN_cfc-11_mon.txt", + known_hash="6d8b653daf80d3fd8c295c91e8842e8c81242c36a543da88b19964c1de7ef7ad", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/jungfraujoch/ascii/AGAGE-GCMS-Medusa_JFJ_cfc-11_mon.txt", + known_hash="1f2dd4650b49c7ee5d9e5a763e1be3daeb72f0243ea249ab3b9c9d586e71f8c4", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/mtecimone/ascii/AGAGE-GCMS-Medusa_CMN_cfc-11_mon.txt", + known_hash="35231a8b2a6776e815c843ad7ba0f99378733dbbaa6c112f33d30d0558e66ad8", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/tacolneston/ascii/AGAGE-GCMS-Medusa_TAC_cfc-11_mon.txt", + known_hash="33fbb30c985d2ae36a48f5a7e6e92e66ea84c83004db006ca2fb1de72f922112", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/zeppelin/ascii/AGAGE-GCMS-Medusa_ZEP_cfc-11_mon.txt", + known_hash="f1eb53ecfa4294ef3581b536804fccac36519dbc4ddafa856c4c4aeb9e7aa048", + ), + ], + ("cfc11", "gc-ms", "monthly"): [ + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/mtecimone/ascii/AGAGE-GCMS-MteCimone_CMN_cfc-11_mon.txt", + known_hash="3394d57cc17222ccc5de8f98edbe26131afc63e718ff9d4563727a098704aa93", + ) + ], + ("cfc12", "gc-md", "monthly"): [ + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/barbados/ascii/AGAGE-GCMD_RPB_cfc-12_mon.txt", + known_hash="504f4d3db1bea293392d3efaf151bfad7e5f2277224cc765877e00e69fd02be0", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/capegrim/ascii/AGAGE-GCMD_CGO_cfc-12_mon.txt", + known_hash="01b70bb4ba8abd98c02f07259c27ce8a105d4208e7bf423f406e4b8da6b480f3", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/macehead/ascii/AGAGE-GCMD_MHD_cfc-12_mon.txt", + known_hash="400e82052314190309650b596a1e2e8bb6ed06e1bff67d831df8b5e1d5738d1b", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/samoa/ascii/AGAGE-GCMD_SMO_cfc-12_mon.txt", + known_hash="28823f27e07f83db07b2507f038cf046bf51697bca8cc19e7b50414e1aea2775", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-md/monthly/trinidad/ascii/AGAGE-GCMD_THD_cfc-12_mon.txt", + known_hash="737e826ab87381fd98578363c5bd040d92c33356ede9af3aae802824a47887c9", + ), + ], + ("cfc12", "gc-ms-medusa", "monthly"): [ + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/gosan/ascii/AGAGE-GCMS-Medusa_GSN_cfc-12_mon.txt", + known_hash="50344532103b093e6442f8f329a4bdb41c578a72f1be77836bb3170b20abab57", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/jungfraujoch/ascii/AGAGE-GCMS-Medusa_JFJ_cfc-12_mon.txt", + known_hash="fa7ac9be9f3d650d4cbd5840b7905968f28a6a6a88a8f5e43ea06fa0f1f29ac2", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/mtecimone/ascii/AGAGE-GCMS-Medusa_CMN_cfc-12_mon.txt", + known_hash="f46822e1c50c6b51db99066ffa671548709d6455fcb6b3989f75643383b49bab", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/tacolneston/ascii/AGAGE-GCMS-Medusa_TAC_cfc-12_mon.txt", + known_hash="5db4ff4ce96cc261c386a62d9d1b59a8458d8db2edc2db7c5b62f6e686ba2989", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/zeppelin/ascii/AGAGE-GCMS-Medusa_ZEP_cfc-12_mon.txt", + known_hash="d018493146fcee2f9750275dd561dd74ff827c207c28435727ba1bb9164e07d2", + ), + ], + ("cfc12", "gc-ms", "monthly"): [ + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/mtecimone/ascii/AGAGE-GCMS-MteCimone_CMN_cfc-12_mon.txt", + known_hash="0325346db119008b4e8a7dd7b9641b8eb097b6b03e69e6af29f240155ace3a2e", + ) + ], + ("hfc134a", "gc-ms-medusa", "monthly"): [ + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/barbados/ascii/AGAGE-GCMS-Medusa_RPB_hfc-134a_mon.txt", + known_hash="d2f39aa42e4ae6182084d595bb51d7a928913819ec30dedc5fab2b09ebce50fa", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/capegrim/ascii/AGAGE-GCMS-Medusa_CGO_hfc-134a_mon.txt", + known_hash="289fd88e1f6e8aa0fce15dadc1e9c1d246108d9a9d615327c4e242dbbbf8095c", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/gosan/ascii/AGAGE-GCMS-Medusa_GSN_hfc-134a_mon.txt", + known_hash="bfdb55db913a285192ac5cdb9456d452cf75d190f5df2d7ad58b10b91bb5211b", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/jungfraujoch/ascii/AGAGE-GCMS-Medusa_JFJ_hfc-134a_mon.txt", + known_hash="7078f3636bbd582f09f9706c4a36bd07a3ea2194800d4dc485a01ac6cefbd3be", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/macehead/ascii/AGAGE-GCMS-Medusa_MHD_hfc-134a_mon.txt", + known_hash="b30dc3b0829d52cda6554a63558f20e2e23713562e6d08cc28beeecb770b9dbe", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/mtecimone/ascii/AGAGE-GCMS-Medusa_CMN_hfc-134a_mon.txt", + known_hash="67bcdb81b91af3a417cf60a7b3fad1c3feb7e6b1ba53ba509c75da9c67754d03", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/samoa/ascii/AGAGE-GCMS-Medusa_SMO_hfc-134a_mon.txt", + known_hash="3aefaadbb40df63585397d3fe8ef4f5ce9980264bd68d2f281d7f3e40177267a", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/tacolneston/ascii/AGAGE-GCMS-Medusa_TAC_hfc-134a_mon.txt", + known_hash="31c4b1d2c15cfe77869a72ee60f6e749fbb775dc5a62451637e72097c3cd0d21", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/trinidad/ascii/AGAGE-GCMS-Medusa_THD_hfc-134a_mon.txt", + known_hash="b61be1912adf36961a37127f78daa42f903b044e796493c7a46ae180c036aa72", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms-medusa/monthly/zeppelin/ascii/AGAGE-GCMS-Medusa_ZEP_hfc-134a_mon.txt", + known_hash="724b699558e9c26cccaff47ef18ce73812cc1a0b25fdb3a2e93d01893f0c564d", + ), + ], + ("hfc134a", "gc-ms", "monthly"): [ + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/capegrim/ascii/AGAGE-GCMS-ADS_CGO_hfc-134a_mon.txt", + known_hash="8c6405431ea194670ac50816bf7a01556bf240d95110098c6f8203a27c73eefd", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/jungfraujoch/ascii/AGAGE-GCMS-ADS_JFJ_hfc-134a_mon.txt", + known_hash="7b0a36cc62629440de2d4ed11c4a52744d38bdac0dc096ee35144bced8ebb032", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/macehead/ascii/AGAGE-GCMS-ADS_MHD_hfc-134a_mon.txt", + known_hash="8ffe2a817fe60d8427a88fdad07937d1103d70cf7780cfac409fe20676bdc4b4", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/mtecimone/ascii/AGAGE-GCMS-MteCimone_CMN_hfc-134a_mon.txt", + known_hash="e69c58e0a4f96d9782645d837e8189582e8471ff4ad22b3f9230bd038ccb4600", + ), + URLSource( + url="https://agage2.eas.gatech.edu/data_archive/agage/gc-ms/monthly/zeppelin/ascii/AGAGE-GCMS-ADS_ZEP_hfc-134a_mon.txt", + known_hash="2cce94a135d6a8f48dcfd487e9196265c8f290d1f29a0b98ef5020607d615516", + ), + ], + # ("cfc11", "gc-md", "monthly"): [ + # + # ], + # ("cfc11", "gc-ms-medusa", "monthly"): [ + # + # ], + # ("cfc11", "gc-ms", "monthly"): [ + # + # ], } diff --git a/src/local/config_creation/compile_historical_emissions.py b/src/local/config_creation/compile_historical_emissions.py new file mode 100644 index 00000000..37b8a6e0 --- /dev/null +++ b/src/local/config_creation/compile_historical_emissions.py @@ -0,0 +1,16 @@ +""" +Historical emissions config creation +""" + +from __future__ import annotations + +from pathlib import Path + +from local.config.compile_historical_emissions import CompileHistoricalEmissionsConfig + +COMPILE_HISTORICAL_EMISSIONS_STEPS = [ + CompileHistoricalEmissionsConfig( + step_config_id="only", + complete_historical_emissions_file=Path("data/processed/historical_emissions"), + ) +] diff --git a/src/local/config_creation/monthly_fifteen_degree_pieces.py b/src/local/config_creation/monthly_fifteen_degree_pieces.py index 0887c5b9..446db438 100644 --- a/src/local/config_creation/monthly_fifteen_degree_pieces.py +++ b/src/local/config_creation/monthly_fifteen_degree_pieces.py @@ -6,6 +6,8 @@ from pathlib import Path +import pint + from local.config.calculate_ch4_monthly_15_degree import ( CalculateCH4MonthlyFifteenDegreePieces, ) @@ -15,11 +17,19 @@ from local.config.calculate_n2o_monthly_15_degree import ( CalculateN2OMonthlyFifteenDegreePieces, ) +from local.config.calculate_sf6_like_monthly_15_degree import ( + CalculateSF6LikeMonthlyFifteenDegreePieces, + SF6LikePreIndustrialConfig, +) + +Q = pint.get_application_registry().Quantity # type: ignore + PieceCalculationOption = ( CalculateCH4MonthlyFifteenDegreePieces | CalculateCO2MonthlyFifteenDegreePieces | CalculateN2OMonthlyFifteenDegreePieces + | CalculateSF6LikeMonthlyFifteenDegreePieces ) @@ -57,6 +67,14 @@ def create_monthly_fifteen_degree_pieces_configs( get_n2o_monthly_fifteen_degree_pieces_config() ] + elif gas in ("sf6", "cfc11", "cfc12", "hfc134a"): + if "calculate_sf6_like_monthly_fifteen_degree_pieces" not in out: + out["calculate_sf6_like_monthly_fifteen_degree_pieces"] = [] + + out["calculate_sf6_like_monthly_fifteen_degree_pieces"].append( + get_sf6_like_monthly_fifteen_degree_pieces_config(gas=gas) + ) + else: raise NotImplementedError(gas) @@ -191,3 +209,80 @@ def get_co2_monthly_fifteen_degree_pieces_config() -> ( latitudinal_gradient_fifteen_degree_allyears_monthly_file=interim_dir / "co2_latitudinal-gradient_fifteen-degree_allyears-monthly.nc", ) + + +PRE_INDUSTRIAL_VALUES_DEFAULT = { + "sf6": SF6LikePreIndustrialConfig( + value=Q(0.0, "ppt"), year=1950, source="Guessing from reading M2017" + ), + "cfc11": SF6LikePreIndustrialConfig( + value=Q(0.0, "ppt"), year=1950, source="Guessing from reading M2017" + ), + "cfc12": SF6LikePreIndustrialConfig( + value=Q(0.0, "ppt"), year=1940, source="Guessing from reading M2017" + ), + "hfc134a": SF6LikePreIndustrialConfig( + value=Q(0.0, "ppt"), year=1990, source="Guessing from reading M2017" + ), +} +"""Default values to use for pre-industrial""" + + +def get_sf6_like_monthly_fifteen_degree_pieces_config( + gas: str, + pre_industrial: SF6LikePreIndustrialConfig | None = None, + allow_poleward_extension: bool = True, +) -> CalculateSF6LikeMonthlyFifteenDegreePieces: + """ + Get the configuration for calculating the monthly, 15 degree pieces for a gas handled like SF6 + + Parameters + ---------- + gas + Gas for which to create the config + + pre_industrial + Pre-industrial value. + If not supplied, we use the value from {py:const}`PRE_INDUSTRIAL_VALUES_DEFAULT` + for ``gas``. + + Returns + ------- + Configuration for calculating the monthly, 15 degree pieces for a gas handled like SF6 + """ + interim_dir = Path(f"data/interim/{gas}") + + if pre_industrial is None: + pre_industrial = PRE_INDUSTRIAL_VALUES_DEFAULT[gas] + + return CalculateSF6LikeMonthlyFifteenDegreePieces( + step_config_id=gas, + gas=gas, + pre_industrial=pre_industrial, + processed_bin_averages_file=interim_dir + / f"{gas}_observational-network_bin-averages.csv", + processed_all_data_with_bins_file=interim_dir + / f"{gas}_observational-network_all-data-with-bin-information.csv", + allow_poleward_extension=allow_poleward_extension, + observational_network_interpolated_file=interim_dir + / f"{gas}_observational-network_interpolated.nc", + observational_network_global_annual_mean_file=interim_dir + / f"{gas}_observational-network_global-annual-mean.nc", + lat_gradient_n_eofs_to_use=1, + observational_network_latitudinal_gradient_eofs_file=interim_dir + / f"{gas}_observational-network_latitudinal-gradient-eofs.nc", + observational_network_seasonality_file=interim_dir + / f"{gas}_observational-network_seasonality.nc", + latitudinal_gradient_allyears_pcs_eofs_file=interim_dir + / f"{gas}_allyears-lat-gradient-eofs-pcs.nc", + latitudinal_gradient_pc0_total_emissions_regression_file=interim_dir + / f"{gas}_pc0-total-emissions-regression.yaml", + global_annual_mean_allyears_file=interim_dir + / f"{gas}_global-annual-mean_allyears.nc", + global_annual_mean_allyears_monthly_file=interim_dir + / f"{gas}_global-annual-mean_allyears-monthly.nc", + seasonality_allyears_fifteen_degree_monthly_file=interim_dir + / f"{gas}_seasonality_fifteen-degree_allyears-monthly.nc", + latitudinal_gradient_fifteen_degree_allyears_monthly_file=interim_dir + / f"{gas}_latitudinal-gradient_fifteen-degree_allyears-monthly.nc", + ) diff --git a/src/local/config_creation/noaa_handling.py b/src/local/config_creation/noaa_handling.py index bf88cba2..bb78756a 100644 --- a/src/local/config_creation/noaa_handling.py +++ b/src/local/config_creation/noaa_handling.py @@ -16,10 +16,44 @@ ProcessNOAASurfaceFlaskDataConfig, ) from local.config.retrieve_and_extract_noaa import RetrieveExtractNOAADataConfig +from local.noaa_processing import HATS_GAS_NAME_MAPPING IN_SITU_URL_BASE = "https://gml.noaa.gov/aftp/data/greenhouse_gases/{gas}/in-situ/surface/{gas}_surface-insitu_ccgg_text.zip" SURFACE_FLASK_URL_BASE = "https://gml.noaa.gov/aftp/data/trace_gases/{gas}/flask/surface/{gas}_surface-flask_ccgg_text.zip" + +def get_hats_url(gas: str) -> str: + """ + Get URL for downloading from NOAA HATs + + Parameters + ---------- + gas + Gas for which to get the URL + + Returns + ------- + URL from which to download the combined data + """ + if "cfc" in gas or "hfc" in gas: + if gas in HATS_GAS_NAME_MAPPING: + gas_hats = HATS_GAS_NAME_MAPPING[gas] + else: + gas_hats = gas + + if "cfc" in gas: + res = f"https://gml.noaa.gov/aftp/data/hats/cfcs/{gas.lower()}/combined/HATS_global_{gas_hats}.txt" + elif "hfc" in gas: + res = f"https://gml.noaa.gov/aftp/data/hats/hfcs/{gas_hats.lower()}_GCMS_flask.txt" + else: + raise NotImplementedError(gas) + + else: + res = f"https://gml.noaa.gov/aftp/data/hats/{gas.lower()}/combined/GML_global_{gas.upper()}.txt" + + return res + + DOWNLOAD_URLS = { ("co2", "surface-flask"): [ URLSource( @@ -30,13 +64,13 @@ ("co2", "in-situ"): [ URLSource( url=IN_SITU_URL_BASE.format(gas="co2"), - known_hash="0a68c9716bb9ec29e23966a2394e312618ed9b822885876d1ce5517bdf70acbe", + known_hash="f06a7eb8f8e56f775e4843b889ba4388ad61557c95b0d8a764522893f2d90bc1", ) ], ("ch4", "in-situ"): [ URLSource( url=IN_SITU_URL_BASE.format(gas="ch4"), - known_hash="c8ad74288d860c63b6a027df4d7bf6742e772fc4e3f99a4052607a382d7fefb2", + known_hash="0ac02608ec33f6057e496463e2cf755970d40f4e653a6a5a7d6d14ec519b863e", ) ], ("ch4", "surface-flask"): [ @@ -45,25 +79,49 @@ known_hash="e541578315328857f01eb7432b5949e39beabab2017c09e46727ac49ec728087", ) ], + ("n2o", "hats"): [ + URLSource( + url=get_hats_url("n2o"), + known_hash="d05fb01d87185d5020ca35a30ae40cc9c70fcc7d1e9d0640e43f09df9e568f1a", + ) + ], ("n2o", "surface-flask"): [ URLSource( url=SURFACE_FLASK_URL_BASE.format(gas="n2o"), known_hash="6b7e09c37b7fa456ab170a4c7b825b3d4b9f6eafb0ff61a2a46554b0e63e84b1", ) ], - ("n2o", "hats"): [ + ("sf6", "hats"): [ URLSource( - url="https://gml.noaa.gov/aftp/data/hats/n2o/combined/GML_global_N2O.txt", - known_hash="d05fb01d87185d5020ca35a30ae40cc9c70fcc7d1e9d0640e43f09df9e568f1a", + url=get_hats_url("sf6"), + known_hash="822543e2558e9e22e943478d37dffe0c758091c35d1ff9bf2b2697507dd3b39d", + ) + ], + ("sf6", "surface-flask"): [ + URLSource( + url=SURFACE_FLASK_URL_BASE.format(gas="sf6"), + known_hash="376c78456bba6844cca78ecd812b896eb2f10cc6b8a9bf6cad7a52dc39e31e9a", + ) + ], + ("cfc11", "hats"): [ + URLSource( + url=get_hats_url("cfc11"), + known_hash="c6067e98bf3896a45e21a248155bbf07815facce2c428bf015560602f31661f9", + ) + ], + ("cfc12", "hats"): [ + URLSource( + url=get_hats_url("cfc12"), + known_hash="2537e02a6c4fc880c15db6ddf7ff0037add7e3f55fb227523e24ca16363128e0", + ) + ], + ("hfc134a", "hats"): [ + URLSource( + url=get_hats_url("hfc134a"), + known_hash="b4d7c2b760d13e2fe9f720b063dfec2b00f6ece65094d4a2e970bd53280a55a5", ) ], } -# ( -# "sf6", -# "surface-flask", -# "376c78456bba6844cca78ecd812b896eb2f10cc6b8a9bf6cad7a52dc39e31e9a", -# ), -# ) class NOAAHandlingPieces(TypedDict): diff --git a/src/local/config_creation/retrieve_misc_data.py b/src/local/config_creation/retrieve_misc_data.py index e9937559..744ffd34 100644 --- a/src/local/config_creation/retrieve_misc_data.py +++ b/src/local/config_creation/retrieve_misc_data.py @@ -40,7 +40,7 @@ download_url=URLSource( # Use the analysis time series, rather than non-infilled url="https://www.metoffice.gov.uk/hadobs/hadcrut5/data/HadCRUT.5.0.2.0/analysis/diagnostics/HadCRUT.5.0.2.0.analysis.summary_series.global.annual.nc", - known_hash="1063dbe93d7b486464c4c3b06466cd2a4a836638249c1552619a40cda2d78298", + known_hash="c1e6b0b6b372a428adea4fac109eca0278acf857ace4da0f43221fd0379ea353", ), ), ) diff --git a/src/local/global_mean_extension.py b/src/local/global_mean_extension.py new file mode 100644 index 00000000..ac053b65 --- /dev/null +++ b/src/local/global_mean_extension.py @@ -0,0 +1,28 @@ +""" +Global-mean extension handling +""" + +from __future__ import annotations + +from pathlib import Path + +from local.config.base import Config + + +def get_global_mean_supplement_files(gas: str, config: Config) -> list[Path]: + """ + Get global-mean supplement files for a given gas + + Parameters + ---------- + gas + Gas + + config + Configuration instance to use for this retrieval + + Returns + ------- + Global-mean supplement files + """ + return [] diff --git a/src/local/mean_preserving_interpolation.py b/src/local/mean_preserving_interpolation.py index e8722510..6fb6ab19 100644 --- a/src/local/mean_preserving_interpolation.py +++ b/src/local/mean_preserving_interpolation.py @@ -25,7 +25,7 @@ def interpolate_annual_mean_to_monthly( annual_mean: xr.DataArray, degrees_freedom_scalar: float = 1.1, rtol: float = 1e-8, - atol: float = 0.0, + atol: float = 1e-6, ) -> xr.DataArray: """ Interpolate annual-mean values to monthly values. @@ -47,6 +47,7 @@ def interpolate_annual_mean_to_monthly( atol Absolute tolerance to apply while checking the interpolation preserved the annual-mean. + This should line up with the tolerance that is used by {py:func}`mean_preserving_interpolation`. Returns ------- @@ -125,6 +126,7 @@ def interpolator( def interpolate_lat_15_degree_to_half_degree( lat_15_degree: xr.DataArray, degrees_freedom_scalar: float = 1.75, + atol: float = 1e-6, ) -> xr.DataArray: """ Interpolate data on a 15 degree latitudinal grid to a 0.5 degree latitudinal grid. @@ -137,6 +139,11 @@ def interpolate_lat_15_degree_to_half_degree( degrees_freedom_scalar Degrees of freedom to use in the interpolation + atol + Absolute tolerance for checking consistency with input data, + and whether the input data is zero. + This should line up with the tolerance that is used by {py:func}`mean_preserving_interpolation`. + Returns ------- Data interpolated onto a 0.5 degree latitudinal grid. @@ -161,29 +168,35 @@ def interpolate_lat_15_degree_to_half_degree( TARGET_LAT_SPACING, ) - # Assume that each value just applies to a point - # (no area extent/interpolation of the data, - # i.e. we're calculating a weighted sum, not an integral). - # Hence use cos here. - weights = np.cos(np.deg2rad(x)) - - coefficients, intercept, knots, degree = mean_preserving_interpolation( - X=X, - Y=Y, - x=x, - weights=weights, - degrees_freedom_scalar=degrees_freedom_scalar, - ) - - def interpolator( - x: float | int | npt.NDArray[np.float64], - ) -> pint.UnitRegistry.Quantity: - return Quantity( # type: ignore - scipy.interpolate.BSpline(t=knots, c=coefficients, k=degree)(x) + intercept, - lat_15_degree.data.units, + if np.isclose(Y, 0.0, atol=atol).all(): + # Short-cut + y = Quantity(np.zeros_like(x), lat_15_degree.data.units) + + else: + # Assume that each value just applies to a point + # (no area extent/interpolation of the data, + # i.e. we're calculating a weighted sum, not an integral). + # Hence use cos here. + weights = np.cos(np.deg2rad(x)) + + coefficients, intercept, knots, degree = mean_preserving_interpolation( + X=X, + Y=Y, + x=x, + weights=weights, + degrees_freedom_scalar=degrees_freedom_scalar, ) - y = interpolator(x) + def interpolator( + x: float | int | npt.NDArray[np.float64], + ) -> pint.UnitRegistry.Quantity: + return Quantity( # type: ignore + scipy.interpolate.BSpline(t=knots, c=coefficients, k=degree)(x) + + intercept, + lat_15_degree.data.units, + ) + + y = interpolator(x) out = xr.DataArray( name="fine_grid", @@ -197,6 +210,7 @@ def interpolator( .apply(calculate_global_mean_from_lon_mean) .data.squeeze(), lat_15_degree.data.squeeze(), + atol=atol, ) return out diff --git a/src/local/noaa_processing.py b/src/local/noaa_processing.py index 4670858c..1ff2442a 100644 --- a/src/local/noaa_processing.py +++ b/src/local/noaa_processing.py @@ -38,6 +38,30 @@ """Mapping from NOAA units to convention we use""" +HATS_GAS_NAME_MAPPING: dict[str, str] = {"cfc11": "F11", "cfc12": "F12"} +"""Mapping from HATS names for gases to our names""" + +HATS_GAS_NAME_MAPPING_REVERSE = {v: k for k, v in HATS_GAS_NAME_MAPPING.items()} +HATS_ASSUMED_LOCATION: dict[str, dict[str, float]] = { + "alt": {"latitude": 82.5, "longitude": -62.3}, + "sum": {"latitude": 72.6, "longitude": -38.4}, + "brw": {"latitude": 71.3, "longitude": -156.6}, + "mhd": {"latitude": 53.0, "longitude": -10.0}, + "thd": {"latitude": 41.0, "longitude": -124.0}, + "nwr": {"latitude": 40.052, "longitude": -105.585}, + "kum": {"latitude": 19.5, "longitude": -154.8}, + "mlo": {"latitude": 19.5, "longitude": -155.6}, + # Guessing that this is Mauna Loa's emergency site... + "mlo_pfp": {"latitude": 19.823, "longitude": -155.469}, + "smo": {"latitude": -14.3, "longitude": -170.6}, + "cgo": {"latitude": -40.7, "longitude": 144.8}, + "psa": {"latitude": -64.6, "longitude": -64.0}, + "spo": {"latitude": -90.0, "longitude": 0.0}, + "hfm": {"latitude": 42.5, "longitude": -72.2}, + "lef": {"latitude": 45.6, "longitude": -90.27}, +} + + def get_metadata_from_filename_default(filename: str) -> dict[str, str]: """ Get metadata from filename - default implementation @@ -179,7 +203,7 @@ def read_data_incl_datetime( comment="#", date_format={}, parse_dates=datetime_columns, - delim_whitespace=True, + sep=r"\s+", ) df_events["unit"] = units @@ -237,7 +261,7 @@ def read_flask_monthly_data( header=None, names=data_fields, comment="#", - delim_whitespace=True, + sep=r"\s+", converters={"site": str}, # keep '000' as string ) @@ -426,8 +450,13 @@ def read_noaa_in_situ_zip( return df_months -def read_noaa_hats( # noqa: PLR0912, PLR0915 - infile: Path, gas: str, source: str, skiprows: int = 67, sep: str = r"\s+" +def read_noaa_hats( # noqa: PLR0913 + infile: Path, + gas: str, + source: str, + sep: str = r"\s+", + unit_assumed: str = "ppt", + time_col_assumed: str = "yyyymmdd", ) -> pd.DataFrame: """ Read NOAA HATS data file @@ -443,8 +472,86 @@ def read_noaa_hats( # noqa: PLR0912, PLR0915 source Source of the file - skiprows - Number of rows to skip when reading the file + sep + Separator to assume when reading the file + + unit_assumed + Assumed unit of the data + + time_col_assumed + Assumed time column of the data + + Returns + ------- + Read data + """ + with open(infile) as fh: + file_content = fh.read() + + gas_file = infile.stem.split("_")[0] + + if gas_file in HATS_GAS_NAME_MAPPING_REVERSE: + gas_file_mapped = HATS_GAS_NAME_MAPPING_REVERSE[gas_file] + + else: + gas_file_mapped = gas_file + + gas_file_mapped = gas_file_mapped.lower() + + if gas != gas_file_mapped: + msg = f"{gas=}, {gas_file=}, {gas_file_mapped=}" + raise AssertionError(msg) + + res = pd.read_csv(StringIO(file_content), skiprows=1, sep=sep) + res["year"] = res[time_col_assumed].astype(str).apply(lambda x: x[:4]).astype(int) + res["month"] = res[time_col_assumed].astype(str).apply(lambda x: x[4:6]).astype(int) + res["value"] = res[gas] + res["unit"] = unit_assumed + res["gas"] = gas + res["source"] = "hats" + res["latitude"] = res["site"].apply(lambda x: HATS_ASSUMED_LOCATION[x]["latitude"]) + res["longitude"] = res["site"].apply( + lambda x: HATS_ASSUMED_LOCATION[x]["longitude"] + ) + res = res.rename({"site": "site_code"}, axis="columns") + res = res[ + [ + "year", + "month", + "value", + "site_code", + "latitude", + "longitude", + "gas", + "source", + "unit", + ] + ] + + # Take average where there is more than one observation in a month. + # Bit annoying that NOAA doesn't have a monthly file, oh well. + all_except_value = list(set(res.columns) - {"value"}) + res = res.groupby(all_except_value)["value"].mean().to_frame("value").reset_index() + + return res + + +def read_noaa_hats_combined( # noqa: PLR0912, PLR0915 + infile: Path, gas: str, source: str, sep: str = r"\s+" +) -> pd.DataFrame: + """ + Read NOAA HATS data file + + Parameters + ---------- + infile + File to read + + gas + Gas we assume is in the file + + source + Source of the file sep Separator to assume when reading the file @@ -456,10 +563,18 @@ def read_noaa_hats( # noqa: PLR0912, PLR0915 with open(infile) as fh: file_content = fh.read() - gas_file = infile.stem.split("_")[2].lower() + gas_file = infile.stem.split("_")[2] + + if gas_file in HATS_GAS_NAME_MAPPING_REVERSE: + gas_file_mapped = HATS_GAS_NAME_MAPPING_REVERSE[gas_file] + + else: + gas_file_mapped = gas_file + + gas_file_mapped = gas_file_mapped.lower() - if gas != gas_file: - msg = f"{gas=}, {gas_file=}" + if gas != gas_file_mapped: + msg = f"{gas=}, {gas_file=}, {gas_file_mapped=}" raise AssertionError(msg) try: @@ -475,7 +590,7 @@ def read_noaa_hats( # noqa: PLR0912, PLR0915 if unit.endswith("."): unit = unit[:-1] - tmp = pd.read_csv(StringIO(file_content), skiprows=skiprows, sep=sep) + tmp = pd.read_csv(StringIO(file_content), comment="#", sep=sep) year_col = tmp.columns[0] if not year_col.endswith("YYYY"): @@ -493,7 +608,7 @@ def read_noaa_hats( # noqa: PLR0912, PLR0915 c = cast(str, c) if ( c.endswith("sd") - or not c.endswith(gas.upper()) + or not c.endswith(gas_file.upper()) or any(v in c for v in ("NH", "SH", "Global")) ): continue diff --git a/src/local/notebook_steps/calculate_n2o_monthly_fifteen_degree_pieces.py b/src/local/notebook_steps/calculate_n2o_monthly_fifteen_degree_pieces.py index 6117fd10..ec21905d 100644 --- a/src/local/notebook_steps/calculate_n2o_monthly_fifteen_degree_pieces.py +++ b/src/local/notebook_steps/calculate_n2o_monthly_fifteen_degree_pieces.py @@ -51,11 +51,12 @@ def configure_notebooks( config=config, step=step_name, step_config_id=step_config_id ) - config_process_noaa_surface_flask_data = get_config_for_step_id( - config=config, - step="process_noaa_surface_flask_data", - step_config_id=config_step.gas, - ) + # Don't use as already in HATS data + # config_process_noaa_surface_flask_data = get_config_for_step_id( + # config=config, + # step="process_noaa_surface_flask_data", + # step_config_id=config_step.gas, + # ) config_process_noaa_hats_data = get_config_for_step_id( config=config, step="process_noaa_hats_data", @@ -89,7 +90,6 @@ def configure_notebooks( ], configuration=(), dependencies=( - config_process_noaa_surface_flask_data.processed_monthly_data_with_loc_file, config_process_noaa_hats_data.processed_monthly_data_with_loc_file, config_process_agage_data_gc_md.processed_monthly_data_with_loc_file, config_process_ale_data.processed_monthly_data_with_loc_file, diff --git a/src/local/notebook_steps/calculate_sf6_like_monthly_fifteen_degree_pieces.py b/src/local/notebook_steps/calculate_sf6_like_monthly_fifteen_degree_pieces.py new file mode 100644 index 00000000..00994440 --- /dev/null +++ b/src/local/notebook_steps/calculate_sf6_like_monthly_fifteen_degree_pieces.py @@ -0,0 +1,218 @@ +""" +Calculate SF6-like gases monthly 15 degree pieces notebook steps +""" + +from __future__ import annotations + +from collections.abc import Iterable +from pathlib import Path +from typing import TYPE_CHECKING + +from pydoit_nb.config_handling import get_config_for_step_id +from pydoit_nb.notebook import ConfiguredNotebook, UnconfiguredNotebook +from pydoit_nb.notebook_step import UnconfiguredNotebookBasedStep + +from local.observational_network_binning import get_obs_network_binning_input_files + +if TYPE_CHECKING: + from ..config.base import Config, ConfigBundle + + +def configure_notebooks( + unconfigured_notebooks: Iterable[UnconfiguredNotebook], + config_bundle: ConfigBundle, + step_name: str, + step_config_id: str, +) -> list[ConfiguredNotebook]: + """ + Configure notebooks + + Parameters + ---------- + unconfigured_notebooks + Unconfigured notebooks + + config_bundle + Configuration bundle from which to take configuration values + + step_name + Name of the step + + step_config_id + Step config ID to use when configuring the notebook + + Returns + ------- + Configured notebooks + """ + uc_nbs_dict = {nb.notebook_path: nb for nb in unconfigured_notebooks} + + config = config_bundle.config_hydrated + + config_step = get_config_for_step_id( + config=config, step=step_name, step_config_id=step_config_id + ) + + obs_network_input_files = get_obs_network_binning_input_files( + gas=config_step.gas, config=config + ) + + config_historical_emissions = get_config_for_step_id( + config=config, step="compile_historical_emissions", step_config_id="only" + ) + # config_retrieve_misc = get_config_for_step_id( + # config=config, step="retrieve_misc_data", step_config_id="only" + # ) + + global_mean_supplement_files = get_obs_network_binning_input_files( + gas=config_step.gas, config=config + ) + + configured_notebooks = [ + ConfiguredNotebook( + unconfigured_notebook=uc_nbs_dict[ + Path("13yy_sf6-like-monthly-15-degree") + / "1300_sf6-like_bin-observational-network" + ], + configuration=(), + dependencies=tuple(obs_network_input_files), + targets=(config_step.processed_bin_averages_file,), + config_file=config_bundle.config_hydrated_path, + step_config_id=step_config_id, + ), + ConfiguredNotebook( + unconfigured_notebook=uc_nbs_dict[ + Path("13yy_sf6-like-monthly-15-degree") + / "1301_sf6-like_interpolate-observational-network" + ], + configuration=(), + dependencies=(config_step.processed_bin_averages_file,), + targets=(config_step.observational_network_interpolated_file,), + config_file=config_bundle.config_hydrated_path, + step_config_id=step_config_id, + ), + ConfiguredNotebook( + unconfigured_notebook=uc_nbs_dict[ + Path("13yy_sf6-like-monthly-15-degree") + / "1302_sf6-like_observational-network-global-mean-latitudinal-gradient-seasonality" + ], + configuration=(), + dependencies=(config_step.observational_network_interpolated_file,), + targets=( + config_step.observational_network_global_annual_mean_file, + config_step.observational_network_latitudinal_gradient_eofs_file, + config_step.observational_network_seasonality_file, + ), + config_file=config_bundle.config_hydrated_path, + step_config_id=step_config_id, + ), + ConfiguredNotebook( + unconfigured_notebook=uc_nbs_dict[ + Path("13yy_sf6-like-monthly-15-degree") + / "1303_sf6-like_extend-lat-gradient-pcs" + ], + configuration=(), + dependencies=( + config_step.observational_network_global_annual_mean_file, + config_step.observational_network_latitudinal_gradient_eofs_file, + config_historical_emissions.complete_historical_emissions_file, + ), + targets=( + config_step.latitudinal_gradient_allyears_pcs_eofs_file, + config_step.latitudinal_gradient_pc0_total_emissions_regression_file, + ), + config_file=config_bundle.config_hydrated_path, + step_config_id=step_config_id, + ), + ConfiguredNotebook( + unconfigured_notebook=uc_nbs_dict[ + Path("13yy_sf6-like-monthly-15-degree") + / "1304_sf6-like_extend-global-annual-mean" + ], + configuration=(), + dependencies=( + config_step.observational_network_global_annual_mean_file, + config_step.latitudinal_gradient_allyears_pcs_eofs_file, + *global_mean_supplement_files, + ), + targets=(config_step.global_annual_mean_allyears_file,), + config_file=config_bundle.config_hydrated_path, + step_config_id=step_config_id, + ), + ConfiguredNotebook( + unconfigured_notebook=uc_nbs_dict[ + Path("13yy_sf6-like-monthly-15-degree") + / "1305_sf6-like_create-pieces-for-gridding" + ], + configuration=(), + dependencies=( + config_step.global_annual_mean_allyears_file, + config_step.observational_network_seasonality_file, + config_step.latitudinal_gradient_allyears_pcs_eofs_file, + ), + targets=( + config_step.global_annual_mean_allyears_monthly_file, + config_step.seasonality_allyears_fifteen_degree_monthly_file, + config_step.latitudinal_gradient_fifteen_degree_allyears_monthly_file, + ), + config_file=config_bundle.config_hydrated_path, + step_config_id=step_config_id, + ), + ] + + return configured_notebooks + + +step: UnconfiguredNotebookBasedStep[ + Config, ConfigBundle +] = UnconfiguredNotebookBasedStep( + step_name="calculate_sf6_like_monthly_fifteen_degree_pieces", + unconfigured_notebooks=[ + UnconfiguredNotebook( + notebook_path=Path("13yy_sf6-like-monthly-15-degree") + / "1300_sf6-like_bin-observational-network", + raw_notebook_ext=".py", + summary="SF6-like gas pieces - Bin observational data", + doc="Bin the observational data for SF6-like gases.", + ), + UnconfiguredNotebook( + notebook_path=Path("13yy_sf6-like-monthly-15-degree") + / "1301_sf6-like_interpolate-observational-network", + raw_notebook_ext=".py", + summary="SF6-like gas pieces - Interpolate observational network onto our 15 deg x 60 deg grid", + doc="Interpolate the observational data for SF6-like gases.", + ), + UnconfiguredNotebook( + notebook_path=Path("13yy_sf6-like-monthly-15-degree") + / "1302_sf6-like_observational-network-global-mean-latitudinal-gradient-seasonality", + raw_notebook_ext=".py", + summary="SF6-like gas pieces - Observational network pieces", + doc="Calculate latitudinal gradient, seasonality and global-mean from the observational network", + ), + UnconfiguredNotebook( + notebook_path=Path("13yy_sf6-like-monthly-15-degree") + / "1303_sf6-like_extend-lat-gradient-pcs", + raw_notebook_ext=".py", + summary="SF6-like gas pieces - Extend PCs over the entire time period", + doc="Extend the principal components (PCs) over the entire time period of interest", + ), + UnconfiguredNotebook( + notebook_path=Path("13yy_sf6-like-monthly-15-degree") + / "1304_sf6-like_extend-global-annual-mean", + raw_notebook_ext=".py", + summary="SF6-like gas pieces - Extend global, annual-mean over the entire time period", + doc=( + "Extend the global, annual-mean over the entire time period of interest " + "using other data sources and our latitudinal gradient." + ), + ), + UnconfiguredNotebook( + notebook_path=Path("13yy_sf6-like-monthly-15-degree") + / "1305_sf6-like_create-pieces-for-gridding", + raw_notebook_ext=".py", + summary="SF6-like gas pieces - Finalise the pieces for gridding", + doc="Finalise the pieces required for creating gridded files", + ), + ], + configure_notebooks=configure_notebooks, +) diff --git a/src/local/notebook_steps/compile_historical_emissions.py b/src/local/notebook_steps/compile_historical_emissions.py new file mode 100644 index 00000000..1f6b503c --- /dev/null +++ b/src/local/notebook_steps/compile_historical_emissions.py @@ -0,0 +1,85 @@ +""" +Calculate historical emissions notebook steps +""" + +from __future__ import annotations + +from collections.abc import Iterable +from pathlib import Path +from typing import TYPE_CHECKING + +from pydoit_nb.config_handling import get_config_for_step_id +from pydoit_nb.notebook import ConfiguredNotebook, UnconfiguredNotebook +from pydoit_nb.notebook_step import UnconfiguredNotebookBasedStep + +if TYPE_CHECKING: + from ..config.base import Config, ConfigBundle + + +def configure_notebooks( + unconfigured_notebooks: Iterable[UnconfiguredNotebook], + config_bundle: ConfigBundle, + step_name: str, + step_config_id: str, +) -> list[ConfiguredNotebook]: + """ + Configure notebooks + + Parameters + ---------- + unconfigured_notebooks + Unconfigured notebooks + + config_bundle + Configuration bundle from which to take configuration values + + step_name + Name of the step + + step_config_id + Step config ID to use when configuring the notebook + + Returns + ------- + Configured notebooks + """ + uc_nbs_dict = {nb.notebook_path: nb for nb in unconfigured_notebooks} + + config = config_bundle.config_hydrated + + config_step = get_config_for_step_id( + config=config, step=step_name, step_config_id=step_config_id + ) + + configured_notebooks = [ + ConfiguredNotebook( + unconfigured_notebook=uc_nbs_dict[ + Path("010y_compile-historical-emissions") + / "0109_compile-complete-dataset" + ], + configuration=(), + dependencies=(), + targets=(config_step.complete_historical_emissions_file,), + config_file=config_bundle.config_hydrated_path, + step_config_id=step_config_id, + ), + ] + + return configured_notebooks + + +step: UnconfiguredNotebookBasedStep[ + Config, ConfigBundle +] = UnconfiguredNotebookBasedStep( + step_name="compile_historical_emissions", + unconfigured_notebooks=[ + UnconfiguredNotebook( + notebook_path=Path("010y_compile-historical-emissions") + / "0109_compile-complete-dataset", + raw_notebook_ext=".py", + summary="Compile historical emissions - create complete dataset", + doc="Compile a complete historical emissions dataset from our various sources.", + ), + ], + configure_notebooks=configure_notebooks, +) diff --git a/src/local/notebook_steps/crunch_grids.py b/src/local/notebook_steps/crunch_grids.py index c1946126..ef6c57db 100644 --- a/src/local/notebook_steps/crunch_grids.py +++ b/src/local/notebook_steps/crunch_grids.py @@ -51,10 +51,18 @@ def configure_notebooks( config=config, step=step_name, step_config_id=step_config_id ) + if config_step.gas in ("co2", "ch4", "n2o"): + step = f"calculate_{config_step.gas}_monthly_fifteen_degree_pieces" + step_config_id_gridding_pieces_step = "only" + + else: + step = "calculate_sf6_like_monthly_fifteen_degree_pieces" + step_config_id_gridding_pieces_step = config_step.gas + config_gridding_pieces_step = get_config_for_step_id( config=config, - step=f"calculate_{config_step.gas}_monthly_fifteen_degree_pieces", - step_config_id="only", + step=step, + step_config_id=step_config_id_gridding_pieces_step, ) configured_notebooks = [ diff --git a/src/local/observational_network_binning.py b/src/local/observational_network_binning.py new file mode 100644 index 00000000..a80a66f8 --- /dev/null +++ b/src/local/observational_network_binning.py @@ -0,0 +1,129 @@ +""" +Observational network binning functionality +""" + +from __future__ import annotations + +from pathlib import Path + +from pydoit_nb.config_handling import get_config_for_step_id + +from local.config.base import Config + + +def get_obs_network_binning_input_files(gas: str, config: Config) -> list[Path]: + """ + Get the input files to use for binning the observational network + + Parameters + ---------- + gas + Gas for which to retrieve the input files + + config + Configuration instance to use for this retrieval + + Returns + ------- + Input files to use for binning the observational network + """ + if gas in ("sf6",): + return get_input_files_sf6_like(gas=gas, config=config) + + if gas in ("cfc11", "cfc12"): + return get_input_files_cfc11_like(gas=gas, config=config) + + if gas in ("hfc134a",): + return get_input_files_hfc134a_like(gas=gas, config=config) + + raise NotImplementedError(gas) + + +def get_input_files_sf6_like(gas: str, config: Config) -> list[Path]: + """ + Get the input files to use for binning the observational network for gases we handle like SF6 + """ + # # Don't use SF6 surface flask to avoid double counting + # config_process_noaa_surface_flask_data = get_config_for_step_id( + # config=config, + # step="process_noaa_surface_flask_data", + # step_config_id=gas, + # ) + config_process_noaa_hats_data = get_config_for_step_id( + config=config, + step="process_noaa_hats_data", + step_config_id=gas, + ) + config_process_agage_gc_md_data = get_config_for_step_id( + config=config, + step="retrieve_and_extract_agage_data", + step_config_id=f"{gas}_gc-md_monthly", + ) + config_process_agage_gc_ms_medusa_data = get_config_for_step_id( + config=config, + step="retrieve_and_extract_agage_data", + step_config_id=f"{gas}_gc-ms-medusa_monthly", + ) + return [ + config_process_noaa_hats_data.processed_monthly_data_with_loc_file, + config_process_agage_gc_md_data.processed_monthly_data_with_loc_file, + config_process_agage_gc_ms_medusa_data.processed_monthly_data_with_loc_file, + ] + + +def get_input_files_cfc11_like(gas: str, config: Config) -> list[Path]: + """ + Get the input files to use for binning the observational network for gases we handle like CFC-11 + """ + config_process_noaa_hats_data = get_config_for_step_id( + config=config, + step="process_noaa_hats_data", + step_config_id=gas, + ) + config_process_agage_gc_md_data = get_config_for_step_id( + config=config, + step="retrieve_and_extract_agage_data", + step_config_id=f"{gas}_gc-md_monthly", + ) + config_process_agage_gc_ms_data = get_config_for_step_id( + config=config, + step="retrieve_and_extract_agage_data", + step_config_id=f"{gas}_gc-ms_monthly", + ) + config_process_agage_gc_ms_medusa_data = get_config_for_step_id( + config=config, + step="retrieve_and_extract_agage_data", + step_config_id=f"{gas}_gc-ms-medusa_monthly", + ) + return [ + config_process_noaa_hats_data.processed_monthly_data_with_loc_file, + config_process_agage_gc_md_data.processed_monthly_data_with_loc_file, + config_process_agage_gc_ms_data.processed_monthly_data_with_loc_file, + config_process_agage_gc_ms_medusa_data.processed_monthly_data_with_loc_file, + ] + + +def get_input_files_hfc134a_like(gas: str, config: Config) -> list[Path]: + """ + Get the input files to use for binning the observational network for gases we handle like HFC-134a + """ + config_process_noaa_hats_data = get_config_for_step_id( + config=config, + step="process_noaa_hats_data", + step_config_id=gas, + ) + config_process_agage_gc_ms_data = get_config_for_step_id( + config=config, + step="retrieve_and_extract_agage_data", + step_config_id=f"{gas}_gc-ms_monthly", + ) + config_process_agage_gc_ms_medusa_data = get_config_for_step_id( + config=config, + step="retrieve_and_extract_agage_data", + step_config_id=f"{gas}_gc-ms-medusa_monthly", + ) + return [ + config_process_noaa_hats_data.processed_monthly_data_with_loc_file, + config_process_agage_gc_ms_data.processed_monthly_data_with_loc_file, + config_process_agage_gc_ms_medusa_data.processed_monthly_data_with_loc_file, + ] diff --git a/src/local/seasonality.py b/src/local/seasonality.py index 140c9de1..d3b859a2 100644 --- a/src/local/seasonality.py +++ b/src/local/seasonality.py @@ -8,6 +8,7 @@ import xarray as xr from attrs import define +from local.mean_preserving_interpolation import interpolate_annual_mean_to_monthly from local.regressors import LinearRegressionResult from local.xarray_time import convert_time_to_year_month @@ -15,7 +16,7 @@ def calculate_seasonality( lon_mean: xr.DataArray, global_mean: xr.DataArray, -) -> tuple[xr.DataArray, xr.DataArray]: +) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray]: """ Calculate seasonality """ @@ -25,21 +26,26 @@ def calculate_seasonality( raise AssertionError(msg) lon_mean_ym_annual_mean = lon_mean_ym.mean("month") - lon_mean_ym_monthly_anomalies = lon_mean_ym - lon_mean_ym_annual_mean + lon_mean_ym_annual_mean_monthly = lon_mean_ym_annual_mean.groupby("lat", squeeze=False).apply( # type: ignore + interpolate_annual_mean_to_monthly, + ) + lon_mean_ym_monthly_anomalies = lon_mean_ym - lon_mean_ym_annual_mean_monthly + lon_mean_ym_monthly_anomalies_year_average = lon_mean_ym_monthly_anomalies.mean( "year" ) + seasonality = lon_mean_ym_monthly_anomalies_year_average relative_seasonality = seasonality / global_mean.mean("time") np.testing.assert_allclose( - seasonality.mean("month").pint.dequantify(), 0.0, atol=1e-13 + seasonality.mean("month").pint.dequantify(), 0.0, atol=2e-8 ) np.testing.assert_allclose( - relative_seasonality.sum("month").pint.dequantify(), 0.0, atol=1e-13 + relative_seasonality.sum("month").pint.dequantify(), 0.0, atol=1e-8 ) - return seasonality, relative_seasonality + return seasonality, relative_seasonality, lon_mean_ym_monthly_anomalies def calculate_seasonality_change_eofs_pcs( @@ -51,15 +57,9 @@ def calculate_seasonality_change_eofs_pcs( Super helpful resource: https://www.ess.uci.edu/~yu/class/ess210b/lecture.5.EOF.all.pdf """ - lon_mean_ym = convert_time_to_year_month(lon_mean) - if lon_mean_ym.isnull().any(): - msg = "Drop out any years with nan data before starting" - raise AssertionError(msg) - - lon_mean_ym_annual_mean = lon_mean_ym.mean("month") - lon_mean_ym_monthly_anomalies = lon_mean_ym - lon_mean_ym_annual_mean - - seasonality, _ = calculate_seasonality(lon_mean=lon_mean, global_mean=global_mean) + seasonality, _, lon_mean_ym_monthly_anomalies = calculate_seasonality( + lon_mean=lon_mean, global_mean=global_mean + ) seasonality_anomalies = lon_mean_ym_monthly_anomalies - seasonality diff --git a/src/local/tasks.py b/src/local/tasks.py index 0853513c..782c86c8 100644 --- a/src/local/tasks.py +++ b/src/local/tasks.py @@ -17,6 +17,8 @@ calculate_ch4_monthly_fifteen_degree_pieces, calculate_co2_monthly_fifteen_degree_pieces, calculate_n2o_monthly_fifteen_degree_pieces, + calculate_sf6_like_monthly_fifteen_degree_pieces, + compile_historical_emissions, crunch_grids, plot_input_data_overviews, process_noaa_hats, @@ -121,10 +123,12 @@ def gen_all_tasks( retrieve_and_process_epica_data, retrieve_and_process_neem_data, plot_input_data_overviews, + compile_historical_emissions, smooth_law_dome_data, calculate_co2_monthly_fifteen_degree_pieces, calculate_ch4_monthly_fifteen_degree_pieces, calculate_n2o_monthly_fifteen_degree_pieces, + calculate_sf6_like_monthly_fifteen_degree_pieces, crunch_grids, write_input4mips, ]: diff --git a/src/local/xarray_time.py b/src/local/xarray_time.py index 75d257ec..e9209896 100644 --- a/src/local/xarray_time.py +++ b/src/local/xarray_time.py @@ -75,11 +75,15 @@ def convert_year_month_to_time( **kwargs Passed to intialiser of :class:`cftime.datetime` + If not supplied, we use a calendar of "standard". Returns ------- Data with time axis """ + if not kwargs: + kwargs = dict(calendar="standard") + return convert_to_time( inp, time_coords=("year", "month"), @@ -110,12 +114,16 @@ def convert_year_to_time( Day of the month to assume in output **kwargs - Passed to intialiser of :class:`cftime.datetime` + Passed to intialiser of :class:`cftime.datetime`. + If not supplied, we use a calendar of "standard". Returns ------- Data with time axis """ + if not kwargs: + kwargs = dict(calendar="standard") + return convert_to_time( inp, time_coords=("year",), diff --git a/stubs/cftime.pyi b/stubs/cftime.pyi index 47e135c3..17416e02 100644 --- a/stubs/cftime.pyi +++ b/stubs/cftime.pyi @@ -3,9 +3,13 @@ from __future__ import annotations class DatetimeGregorian: """Gregorian datetime class""" - def __init__(self, year: int, month: int, day: int, hour: int = 1) -> None: ... + def __init__( + self, year: int, month: int, day: int, hour: int = 1, calendar: str = "standard" + ) -> None: ... class datetime: """Datetime class""" - def __init__(self, year: int, month: int, day: int, hour: int = 1) -> None: ... + def __init__( + self, year: int, month: int, day: int, hour: int = 1, calendar: str = "standard" + ) -> None: ... diff --git a/tests/conftest.py b/tests/conftest.py index 00c2d2a9..01e65b12 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,7 @@ """ Configuration file for pytest """ + from __future__ import annotations import os @@ -26,6 +27,8 @@ def basic_workflow_output_info() -> dict[str, str | Path]: "doit", "run", "--verbosity=2", + "-n", + "6", ), cwd=REPO_ROOT_DIR, env={ diff --git a/tests/regression/test_workflow.py b/tests/regression/test_workflow.py index 0305c864..69c35174 100644 --- a/tests/regression/test_workflow.py +++ b/tests/regression/test_workflow.py @@ -15,9 +15,10 @@ def test_basic_workflow(basic_workflow_output_info, ndarrays_regression): """ Test the basic workflow - This workflow's runtime should be kept to a minimum so it can sensibly be - used as a test. Less than 30 seconds to run the test is what we should be - aiming for. + This workflow's runtime should be kept to a minimum + so it can sensibly be used as a test. + Less than 30 seconds to run the test is what we should be aiming for. + Obviously, this is not the case right now. """ array_contents = {} for input4mips_file in ( @@ -43,4 +44,6 @@ def test_basic_workflow(basic_workflow_output_info, ndarrays_regression): key_write = f"{filepath_write}__{key}" array_contents[key_write] = value.data - ndarrays_regression.check(array_contents) + ndarrays_regression.check( + array_contents, default_tolerance=dict(atol=1e-6, rtol=1e-3) + ) diff --git a/tests/regression/test_workflow/test_basic_workflow.npz b/tests/regression/test_workflow/test_basic_workflow.npz index 6249e2a6..2cd1d125 100644 Binary files a/tests/regression/test_workflow/test_basic_workflow.npz and b/tests/regression/test_workflow/test_basic_workflow.npz differ