From fb67b89ebe2fb06e58fc8f669469ca9124406882 Mon Sep 17 00:00:00 2001 From: weiqi-tori Date: Wed, 9 Oct 2024 17:59:27 +0800 Subject: [PATCH 01/10] add cams layer --- city_metrix/layers/cams.py | 60 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 city_metrix/layers/cams.py diff --git a/city_metrix/layers/cams.py b/city_metrix/layers/cams.py new file mode 100644 index 0000000..97e7123 --- /dev/null +++ b/city_metrix/layers/cams.py @@ -0,0 +1,60 @@ +import cdsapi +import os +import xarray as xr +import zipfile + +from .layer import Layer + +class Cams(Layer): + def __init__(self, start_date="2023-01-01", end_date="2023-12-01", **kwargs): + super().__init__(**kwargs) + self.start_date = start_date + self.end_date = end_date + + def get_data(self, bbox): + min_lon, min_lat, max_lon, max_lat = bbox + + c = cdsapi.Client() + c.retrieve( + 'cams-global-reanalysis-eac4', + { + 'variable': [ + "2m_temperature", "mean_sea_level_pressure", + "particulate_matter_2.5um", "particulate_matter_10um", + "carbon_monoxide", "nitrogen_dioxide", "ozone", "sulphur_dioxide" + + ], + "model_level": ["60"], + "date": [f"{self.start_date}/{self.end_date}"], + 'time': ['00:00', '03:00', '06:00', '09:00', '12:00', '15:00', '18:00', '21:00'], + 'area': [max_lat, min_lon, min_lat, max_lon], + 'data_format': 'netcdf_zip', + }, + 'cams_download.zip') + + # extract the ZIP file + os.makedirs('cams_download', exist_ok=True) + with zipfile.ZipFile('cams_download.zip', 'r') as zip_ref: + # Extract all the contents of the ZIP file to the specified directory + zip_ref.extractall('cams_download') + + # load netcdf files + dataarray_list = [] + for nc_file in os.listdir('cams_download'): + dataarray = xr.open_dataset(f'cams_download/{nc_file}') + dataarray_list.append(dataarray) + + # not all variables have 'model_level', concatenate without 'model_level' dimension + dataarray_list = [dataarray.squeeze().drop_vars(['model_level', 'latitude', 'longitude'], errors='ignore') for dataarray in dataarray_list] + data = xr.merge(dataarray_list) + # xarray.Dataset to xarray.DataArray + data = data.to_array() + + # Remove local files + os.remove('cams_download.zip') + for nc_file in os.listdir('cams_download'): + os.remove(f'cams_download/{nc_file}') + os.rmdir('cams_download') + + return data + \ No newline at end of file From be26cb773fc7cf846249352d725a46389ee2a832 Mon Sep 17 00:00:00 2001 From: weiqi-tori Date: Thu, 17 Oct 2024 19:11:52 +0800 Subject: [PATCH 02/10] reduce model_level dim --- city_metrix/layers/cams.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/city_metrix/layers/cams.py b/city_metrix/layers/cams.py index 97e7123..856fc78 100644 --- a/city_metrix/layers/cams.py +++ b/city_metrix/layers/cams.py @@ -5,6 +5,7 @@ from .layer import Layer + class Cams(Layer): def __init__(self, start_date="2023-01-01", end_date="2023-12-01", **kwargs): super().__init__(**kwargs) @@ -13,7 +14,7 @@ def __init__(self, start_date="2023-01-01", end_date="2023-12-01", **kwargs): def get_data(self, bbox): min_lon, min_lat, max_lon, max_lat = bbox - + c = cdsapi.Client() c.retrieve( 'cams-global-reanalysis-eac4', @@ -45,7 +46,12 @@ def get_data(self, bbox): dataarray_list.append(dataarray) # not all variables have 'model_level', concatenate without 'model_level' dimension - dataarray_list = [dataarray.squeeze().drop_vars(['model_level', 'latitude', 'longitude'], errors='ignore') for dataarray in dataarray_list] + dataarray_list = [ + dataarray.squeeze(dim='model_level').drop_vars(['model_level']) + if 'model_level' in dataarray.dims + else dataarray + for dataarray in dataarray_list + ] data = xr.merge(dataarray_list) # xarray.Dataset to xarray.DataArray data = data.to_array() @@ -57,4 +63,3 @@ def get_data(self, bbox): os.rmdir('cams_download') return data - \ No newline at end of file From 3f0415867cc94cc9beecce73509edec6d96d189c Mon Sep 17 00:00:00 2001 From: weiqi-tori Date: Fri, 18 Oct 2024 16:33:16 +0800 Subject: [PATCH 03/10] add test and merge xarray dataset --- city_metrix/layers/__init__.py | 1 + city_metrix/layers/cams.py | 7 +++++++ tests/test_layers.py | 6 ++++++ 3 files changed, 14 insertions(+) diff --git a/city_metrix/layers/__init__.py b/city_metrix/layers/__init__.py index e701e52..e365438 100644 --- a/city_metrix/layers/__init__.py +++ b/city_metrix/layers/__init__.py @@ -21,3 +21,4 @@ from .nasa_dem import NasaDEM from .era_5_hottest_day import Era5HottestDay from .impervious_surface import ImperviousSurface +from .cams import Cams diff --git a/city_metrix/layers/cams.py b/city_metrix/layers/cams.py index 856fc78..f3e3a4c 100644 --- a/city_metrix/layers/cams.py +++ b/city_metrix/layers/cams.py @@ -52,6 +52,13 @@ def get_data(self, bbox): else dataarray for dataarray in dataarray_list ] + # drop coordinate ['latitude','longitude'] if uses 360 degree system + dataarray_list = [ + dataarray.drop_vars(['latitude', 'longitude']) + if (dataarray['longitude'].values > 180).any() + else dataarray + for dataarray in dataarray_list + ] data = xr.merge(dataarray_list) # xarray.Dataset to xarray.DataArray data = data.to_array() diff --git a/tests/test_layers.py b/tests/test_layers.py index f8e1fe2..9ea0caf 100644 --- a/tests/test_layers.py +++ b/tests/test_layers.py @@ -5,6 +5,7 @@ AlosDSM, AverageNetBuildingHeight, BuiltUpHeight, + Cams, Era5HottestDay, EsaWorldCover, EsaWorldCoverClass, @@ -48,6 +49,11 @@ def test_built_up_height(): data = BuiltUpHeight().get_data(BBOX) assert np.size(data) > 0 +@pytest.mark.skip(reason="CDS API needs personal access token file to run") +def test_cams(): + data = Cams().get_data(BBOX) + assert np.size(data) > 0 + @pytest.mark.skip(reason="CDS API needs personal access token file to run") def test_era_5_hottest_day(): data = Era5HottestDay().get_data(BBOX) From bd177d63d1a3f2702d8d60debc2ac4ae460abacf Mon Sep 17 00:00:00 2001 From: Ted Wong Date: Wed, 23 Oct 2024 10:03:31 -0400 Subject: [PATCH 04/10] Have __init__ require pollutant species --- city_metrix/layers/cams.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/city_metrix/layers/cams.py b/city_metrix/layers/cams.py index f3e3a4c..b1fbf08 100644 --- a/city_metrix/layers/cams.py +++ b/city_metrix/layers/cams.py @@ -7,30 +7,36 @@ class Cams(Layer): - def __init__(self, start_date="2023-01-01", end_date="2023-12-01", **kwargs): + def __init__(self, start_date="2023-01-01", end_date="2023-12-01", species, **kwargs): super().__init__(**kwargs) self.start_date = start_date self.end_date = end_date + if species in ("particulate_matter_2.5um", "particulate_matter_10um", "carbon_monoxide", "nitrogen_dioxide", "ozone", "sulphur_dioxide"): + self.species = species + else: + raise Exception("Selected species not supported") def get_data(self, bbox): min_lon, min_lat, max_lon, max_lat = bbox c = cdsapi.Client() - c.retrieve( - 'cams-global-reanalysis-eac4', - { - 'variable': [ - "2m_temperature", "mean_sea_level_pressure", - "particulate_matter_2.5um", "particulate_matter_10um", - "carbon_monoxide", "nitrogen_dioxide", "ozone", "sulphur_dioxide" - - ], + query_dict = { "model_level": ["60"], "date": [f"{self.start_date}/{self.end_date}"], 'time': ['00:00', '03:00', '06:00', '09:00', '12:00', '15:00', '18:00', '21:00'], 'area': [max_lat, min_lon, min_lat, max_lon], 'data_format': 'netcdf_zip', - }, + } + if self.species in ("particulate_matter_2.5um", "particulate_matter_10um"): + query_dict['variable'] = [self.species] + else: + query_dict['variable'] = [ + "2m_temperature", "mean_sea_level_pressure", self.species + ] + + c.retrieve( + 'cams-global-reanalysis-eac4', + query_dict, 'cams_download.zip') # extract the ZIP file @@ -52,6 +58,7 @@ def get_data(self, bbox): else dataarray for dataarray in dataarray_list ] + # drop coordinate ['latitude','longitude'] if uses 360 degree system dataarray_list = [ dataarray.drop_vars(['latitude', 'longitude']) From 8ca1ecaf487480c2dc4f7cd3404b5d54cc91121b Mon Sep 17 00:00:00 2001 From: weiqi-tori Date: Thu, 24 Oct 2024 17:15:12 +0800 Subject: [PATCH 05/10] roll back to request 8 vars and add unit conversion --- city_metrix/layers/cams.py | 39 ++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/city_metrix/layers/cams.py b/city_metrix/layers/cams.py index b1fbf08..24bc0af 100644 --- a/city_metrix/layers/cams.py +++ b/city_metrix/layers/cams.py @@ -7,36 +7,29 @@ class Cams(Layer): - def __init__(self, start_date="2023-01-01", end_date="2023-12-01", species, **kwargs): + def __init__(self, start_date="2023-01-01", end_date="2023-12-31", **kwargs): super().__init__(**kwargs) self.start_date = start_date self.end_date = end_date - if species in ("particulate_matter_2.5um", "particulate_matter_10um", "carbon_monoxide", "nitrogen_dioxide", "ozone", "sulphur_dioxide"): - self.species = species - else: - raise Exception("Selected species not supported") def get_data(self, bbox): min_lon, min_lat, max_lon, max_lat = bbox c = cdsapi.Client() - query_dict = { + c.retrieve( + 'cams-global-reanalysis-eac4', + { + 'variable': [ + "2m_temperature", "mean_sea_level_pressure", + "particulate_matter_2.5um", "particulate_matter_10um", + "carbon_monoxide", "nitrogen_dioxide", "ozone", "sulphur_dioxide" + ], "model_level": ["60"], "date": [f"{self.start_date}/{self.end_date}"], 'time': ['00:00', '03:00', '06:00', '09:00', '12:00', '15:00', '18:00', '21:00'], 'area': [max_lat, min_lon, min_lat, max_lon], 'data_format': 'netcdf_zip', - } - if self.species in ("particulate_matter_2.5um", "particulate_matter_10um"): - query_dict['variable'] = [self.species] - else: - query_dict['variable'] = [ - "2m_temperature", "mean_sea_level_pressure", self.species - ] - - c.retrieve( - 'cams-global-reanalysis-eac4', - query_dict, + }, 'cams_download.zip') # extract the ZIP file @@ -58,7 +51,6 @@ def get_data(self, bbox): else dataarray for dataarray in dataarray_list ] - # drop coordinate ['latitude','longitude'] if uses 360 degree system dataarray_list = [ dataarray.drop_vars(['latitude', 'longitude']) @@ -67,6 +59,17 @@ def get_data(self, bbox): for dataarray in dataarray_list ] data = xr.merge(dataarray_list) + + # unit conversion + # particulate matter: concentration * 10^9 + for var in ['pm2p5', 'pm10']: + data[var].values = data[var].values * (10 ** 9) + # other: concentration x pressure / (287.058 * temp) * 10^9 + for var in ['co', 'no2', 'go3', 'so2']: + data[var].values = data[var].values * data['msl'].values / (287.058 * data['t2m'].values) * (10 ** 9) + + # drop pressure and temperature + data = data.drop_vars(['msl', 't2m']) # xarray.Dataset to xarray.DataArray data = data.to_array() From 459cd43fb6a3f9cd639cee0f9678dc314c1bbd91 Mon Sep 17 00:00:00 2001 From: Ted Wong Date: Tue, 29 Oct 2024 18:03:13 -0400 Subject: [PATCH 06/10] set consistent coords for all variables --- city_metrix/layers/cams.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/city_metrix/layers/cams.py b/city_metrix/layers/cams.py index 24bc0af..5a27dd8 100644 --- a/city_metrix/layers/cams.py +++ b/city_metrix/layers/cams.py @@ -58,13 +58,16 @@ def get_data(self, bbox): else dataarray for dataarray in dataarray_list ] + dataarray_list[0] = dataarray_list[0].assign_coords(dataarray_list[1].coords) data = xr.merge(dataarray_list) # unit conversion # particulate matter: concentration * 10^9 + # target unit is ug/m3 for var in ['pm2p5', 'pm10']: data[var].values = data[var].values * (10 ** 9) # other: concentration x pressure / (287.058 * temp) * 10^9 + # target unit is ug/m3 for var in ['co', 'no2', 'go3', 'so2']: data[var].values = data[var].values * data['msl'].values / (287.058 * data['t2m'].values) * (10 ** 9) From e4777e9911b318f20c7504e7f2e868180b64dfd4 Mon Sep 17 00:00:00 2001 From: weiqi-tori Date: Wed, 30 Oct 2024 18:19:58 +0800 Subject: [PATCH 07/10] update coordinate assignment --- city_metrix/layers/cams.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/city_metrix/layers/cams.py b/city_metrix/layers/cams.py index 5a27dd8..ff62e9e 100644 --- a/city_metrix/layers/cams.py +++ b/city_metrix/layers/cams.py @@ -51,14 +51,11 @@ def get_data(self, bbox): else dataarray for dataarray in dataarray_list ] - # drop coordinate ['latitude','longitude'] if uses 360 degree system + # assign coordinate with last dataarray to fix 1) use 360 degree system issue 2) slightly different lat lons dataarray_list = [ - dataarray.drop_vars(['latitude', 'longitude']) - if (dataarray['longitude'].values > 180).any() - else dataarray + dataarray.assign_coords(dataarray_list[-1].coords) for dataarray in dataarray_list ] - dataarray_list[0] = dataarray_list[0].assign_coords(dataarray_list[1].coords) data = xr.merge(dataarray_list) # unit conversion From 3f1825d01f3b6f901d879c5ec8484ce43ae1d867 Mon Sep 17 00:00:00 2001 From: Ted Wong Date: Wed, 30 Oct 2024 16:17:46 -0400 Subject: [PATCH 08/10] Return one-pixel array without latlon dims; workaround for remove-file error --- city_metrix/layers/cams.py | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/city_metrix/layers/cams.py b/city_metrix/layers/cams.py index ff62e9e..08b8644 100644 --- a/city_metrix/layers/cams.py +++ b/city_metrix/layers/cams.py @@ -32,16 +32,23 @@ def get_data(self, bbox): }, 'cams_download.zip') + # If data files from earlier runs not deleted, save new files with postpended numbers + existing_cams_downloads = [fname for fname in os.listdir('.') if fname[:13] == 'cams_download' and fname[-3:] != 'zip'] + num_id = len(existing_cams_downloads) + while f'cams_download_{num_id}' in existing_cams_downloads: + num_id += 1 + fname = 'cams_download{0}'.format(['', '_{0}'.format(num_id)][int(num_id > 0)]) + os.makedirs(fname, exist_ok=True) + # extract the ZIP file - os.makedirs('cams_download', exist_ok=True) with zipfile.ZipFile('cams_download.zip', 'r') as zip_ref: # Extract all the contents of the ZIP file to the specified directory - zip_ref.extractall('cams_download') + zip_ref.extractall(fname) # load netcdf files dataarray_list = [] - for nc_file in os.listdir('cams_download'): - dataarray = xr.open_dataset(f'cams_download/{nc_file}') + for nc_file in os.listdir(fname): + dataarray = xr.open_dataset(f'{fname}/{nc_file}') dataarray_list.append(dataarray) # not all variables have 'model_level', concatenate without 'model_level' dimension @@ -75,8 +82,12 @@ def get_data(self, bbox): # Remove local files os.remove('cams_download.zip') - for nc_file in os.listdir('cams_download'): - os.remove(f'cams_download/{nc_file}') - os.rmdir('cams_download') + try: # Workaround for elusive permission error + for nc_file in os.listdir(fname): + os.remove(f'{fname}/{nc_file}') + os.rmdir(fname) + except: + pass - return data + centerlat, centerlon = (min_lat + max_lat) / 2, (min_lon + max_lon) / 2 + return data.sel(latitude=centerlat, longitude=centerlon, method="nearest") From 4689024441ab880c8f4b19d58a40eb9a30fa7915 Mon Sep 17 00:00:00 2001 From: weiqi-tori Date: Sun, 3 Nov 2024 16:36:49 +0800 Subject: [PATCH 09/10] solve conflict --- city_metrix/layers/cams.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/city_metrix/layers/cams.py b/city_metrix/layers/cams.py index 08b8644..34c8892 100644 --- a/city_metrix/layers/cams.py +++ b/city_metrix/layers/cams.py @@ -33,13 +33,13 @@ def get_data(self, bbox): 'cams_download.zip') # If data files from earlier runs not deleted, save new files with postpended numbers - existing_cams_downloads = [fname for fname in os.listdir('.') if fname[:13] == 'cams_download' and fname[-3:] != 'zip'] + existing_cams_downloads = [fname for fname in os.listdir('.') if fname.startswith('cams_download') and not fname.endswith('.zip')] num_id = len(existing_cams_downloads) while f'cams_download_{num_id}' in existing_cams_downloads: num_id += 1 - fname = 'cams_download{0}'.format(['', '_{0}'.format(num_id)][int(num_id > 0)]) + fname = f'cams_download{"" if num_id == 0 else f"_{num_id}"}' os.makedirs(fname, exist_ok=True) - + # extract the ZIP file with zipfile.ZipFile('cams_download.zip', 'r') as zip_ref: # Extract all the contents of the ZIP file to the specified directory @@ -60,7 +60,7 @@ def get_data(self, bbox): ] # assign coordinate with last dataarray to fix 1) use 360 degree system issue 2) slightly different lat lons dataarray_list = [ - dataarray.assign_coords(dataarray_list[-1].coords) + dataarray.assign_coords(dataarray_list[-1].coords) for dataarray in dataarray_list ] data = xr.merge(dataarray_list) @@ -82,12 +82,17 @@ def get_data(self, bbox): # Remove local files os.remove('cams_download.zip') - try: # Workaround for elusive permission error + # Workaround for elusive permission error + try: for nc_file in os.listdir(fname): os.remove(f'{fname}/{nc_file}') os.rmdir(fname) except: pass - centerlat, centerlon = (min_lat + max_lat) / 2, (min_lon + max_lon) / 2 - return data.sel(latitude=centerlat, longitude=centerlon, method="nearest") + # Select the nearest data point based on latitude and longitude + center_lon = (min_lon + max_lon) / 2 + center_lat = (min_lat + max_lat) / 2 + data = data.sel(latitude=center_lat, longitude=center_lon, method="nearest") + + return data From 010724dc25c0cf2212e3122fdf960bff127a850d Mon Sep 17 00:00:00 2001 From: weiqi-tori Date: Sun, 3 Nov 2024 17:39:29 +0800 Subject: [PATCH 10/10] Use context manager to open and close nc file --- city_metrix/layers/cams.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/city_metrix/layers/cams.py b/city_metrix/layers/cams.py index 34c8892..729d37b 100644 --- a/city_metrix/layers/cams.py +++ b/city_metrix/layers/cams.py @@ -48,8 +48,8 @@ def get_data(self, bbox): # load netcdf files dataarray_list = [] for nc_file in os.listdir(fname): - dataarray = xr.open_dataset(f'{fname}/{nc_file}') - dataarray_list.append(dataarray) + with xr.open_dataset(f'{fname}/{nc_file}') as dataarray: + dataarray_list.append(dataarray) # not all variables have 'model_level', concatenate without 'model_level' dimension dataarray_list = [