Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TCTracks: Option to read additional variables from IBTrACS (e.g. "nature"/stormtype) #728

Merged
merged 8 commits into from
May 31, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Removed:
- `Exposures.affected_total_value` now takes a hazard intensity threshold as argument. Affected values are only those for which at least one event exceeds the threshold. (previously, all exposures points with an assigned centroid were considered affected). By default the centroids are reassigned. [#702](https://github.com/CLIMADA-project/climada_python/pull/702) [#730](https://github.com/CLIMADA-project/climada_python/pull/730)
- Add option to pass region ID to `LitPop.from_shape` [#720](https://github.com/CLIMADA-project/climada_python/pull/720)
- Slightly improved performance on `LitPop`-internal computations [#720](https://github.com/CLIMADA-project/climada_python/pull/720)
- Add option to read additional variables from IBTrACS when using `TCTracks.from_ibtracs_netcdf` [#728](https://github.com/CLIMADA-project/climada_python/pull/728)

### Fixed

Expand Down
54 changes: 39 additions & 15 deletions climada/hazard/tc_tracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ class TCTracks():
Computed during processing:
- on_land (bool for each track position)
- dist_since_lf (in km)
Additional data variables such as "nature" (specifiying, for each track position, whether a
system is a disturbance, tropical storm, post-transition extratropical storm etc.) might be
included, depending on the data source and on use cases.
"""
def __init__(self,
data: Optional[List[xr.Dataset]] = None,
Expand Down Expand Up @@ -328,7 +331,7 @@ def read_ibtracs_netcdf(self, *args, **kwargs):
def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=None,
year_range=None, basin=None, genesis_basin=None,
interpolate_missing=True, estimate_missing=False, correct_pres=False,
discard_single_points=True,
discard_single_points=True, additional_variables=None,
file_name='IBTrACS.ALL.v04r00.nc'):
"""Create new TCTracks object from IBTrACS databse.

Expand Down Expand Up @@ -438,6 +441,10 @@ def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=No
file_name : str, optional
Name of NetCDF file to be dowloaded or located at climada/data/system.
Default: 'IBTrACS.ALL.v04r00.nc'
additional_variables : list of str, optional
If specified, additional IBTrACS data variables are extracted, such as "nature" or
"storm_speed". Only variables that are not agency-specific are supported.
Default: None.

Returns
-------
Expand All @@ -462,6 +469,9 @@ def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=No
f'Error while downloading {IBTRACS_URL}. Try to download it manually and '
f'put the file in {ibtracs_path}') from err

if additional_variables is None:
additional_variables = []

ibtracs_ds = xr.open_dataset(ibtracs_path)
ibtracs_date = ibtracs_ds.attrs["date_created"]
if (np.datetime64('today') - np.datetime64(ibtracs_date)).item().days > 180:
Expand Down Expand Up @@ -576,7 +586,8 @@ def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=No
ibtracs_ds[tc_var].sel(storm=nonsingular_mask).interpolate_na(
dim="date_time", method="linear"))
ibtracs_ds = ibtracs_ds[['sid', 'name', 'basin', 'time', 'valid_t']
+ phys_vars + [f'{v}_agency' for v in phys_vars]]
+ additional_variables + phys_vars
+ [f'{v}_agency' for v in phys_vars]]

if estimate_missing:
ibtracs_ds['pres'][:] = _estimate_pressure(
Expand Down Expand Up @@ -685,28 +696,36 @@ def from_ibtracs_netcdf(cls, provider=None, rescale_windspeeds=True, storm_id=No
"{}({})".format(v, track_ds[f'{v}_agency'].astype(str).item())
for v in phys_vars)

all_tracks.append(xr.Dataset({
'time_step': ('time', track_ds.time_step.data),
data_vars = {
'radius_max_wind': ('time', track_ds.rmw.data),
'radius_oci': ('time', track_ds.roci.data),
'max_sustained_wind': ('time', track_ds.wind.data),
'central_pressure': ('time', track_ds.pres.data),
'environmental_pressure': ('time', track_ds.poci.data),
'basin': ('time', track_ds.basin.data.astype("<U2")),
}, coords={
'time': track_ds.time.dt.round('s').data,
}
coords = {
'time': ('time', track_ds.time.dt.round('s').data),
'lat': ('time', track_ds.lat.data),
'lon': ('time', track_ds.lon.data),
}, attrs={
}
attrs = {
'max_sustained_wind_unit': 'kn',
'central_pressure_unit': 'mb',
'name': track_ds.name.astype(str).item(),
'sid': track_ds.sid.astype(str).item(),
'orig_event_flag': True,
'data_provider': provider_str,
'id_no': track_ds.id_no.item(),
'category': category[i_track],
}))
}
for varname in ["time_step", "basin", "name", "sid", "id_no"] + additional_variables:
values = track_ds[varname].data
dtype_kind = track_ds[varname].dtype.kind
if dtype_kind == "S":
strlen = track_ds[varname].str.len().max().item()
values = values.astype(f"<U{strlen}")
if values.ndim == 0:
attrs[varname] = (values.astype(str) if dtype_kind == "S" else values).item()
else:
data_vars[varname] = ('time', values)
all_tracks.append(xr.Dataset(data_vars, coords=coords, attrs=attrs))
peanutfun marked this conversation as resolved.
Show resolved Hide resolved
if last_perc != 100:
LOGGER.info("Progress: 100%")
if len(all_tracks) == 0:
Expand Down Expand Up @@ -1382,8 +1401,11 @@ def from_hdf5(cls, file_name):
for i in track_no:
track = ds_dict[f'track{i}']
track.attrs['orig_event_flag'] = bool(track.attrs['orig_event_flag'])
# when writing '<U2' and reading in again, xarray reads as dtype 'object'. undo this:
track['basin'] = track['basin'].astype('<U2')
# when writing '<U*' and reading in again, xarray reads as dtype 'object'. undo this:
for varname in track.data_vars:
if track[varname].dtype == "object":
strlen = track[varname].str.len().max().item()
track[varname] = track[varname].astype(f"<U{strlen}")
data.append(track)
return cls(data)

Expand Down Expand Up @@ -1499,7 +1521,9 @@ def _one_interp_data(track, time_step_h, land_geom=None):
time_step = pd.tseries.frequencies.to_offset(pd.Timedelta(hours=time_step_h)).freqstr
track_int = track.resample(time=time_step, skipna=True)\
.interpolate('linear')
track_int['basin'] = track.basin.resample(time=time_step).nearest()
for var in track.data_vars:
if "time" in track[var].dims and track[var].dtype.kind != "f":
track_int[var] = track[var].resample(time=time_step).nearest()
track_int['time_step'][:] = time_step_h
lon_int = lon.resample(time=time_step).interpolate(method)
lon_int[lon_int > 180] -= 360
Expand Down
28 changes: 27 additions & 1 deletion climada/hazard/test/test_tc_tracks.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,32 @@ def test_ibtracs_discard_single_points(self):
break
self.assertTrue(passed)

def test_ibtracs_additional_variables(self):
"""Check additional_variables option"""
tc_track = tc.TCTracks.from_ibtracs_netcdf(
storm_id='2017242N16333',
additional_variables=[
peanutfun marked this conversation as resolved.
Show resolved Hide resolved
'numobs', 'season', 'number', 'subbasin', 'name', 'source_usa', 'source_jma',
'source_cma', 'source_hko', 'source_new', 'source_reu', 'source_bom', 'source_nad',
'source_wel', 'source_td5', 'source_td6', 'source_ds8', 'source_neu', 'source_mlc',
'iso_time', 'nature', 'wmo_wind', 'wmo_pres', 'wmo_agency', 'track_type',
'main_track_sid', 'dist2land', 'landfall', 'iflag', 'storm_speed', 'storm_dir',
],
)
track_ds = tc_track.get_track()
self.assertIn("nature", track_ds.data_vars)
self.assertEqual(track_ds.attrs["numobs"], 123)
self.assertEqual(track_ds.attrs["season"], 2017)
self.assertEqual(track_ds["nature"].values[0], "TS")
self.assertEqual(track_ds["nature"].values[-1], "DS")
self.assertEqual(track_ds["subbasin"].values[0], "NA")
self.assertEqual(track_ds["subbasin"].values[58], "CS")
self.assertEqual(track_ds["dist2land"].values[0], 1020)
self.assertEqual(track_ds["dist2land"].values[-1], 0)
self.assertEqual(track_ds["storm_speed"].values[0], 13.0)
self.assertEqual(track_ds["storm_speed"].values[5], 11.0)
self.assertEqual(track_ds["storm_speed"].values[-1], 8.0)

class TestIO(unittest.TestCase):
"""Test reading of tracks from files of different formats"""
def test_netcdf_io(self):
Expand All @@ -281,7 +307,7 @@ def test_read_legacy_netcdf(self):
np.testing.assert_array_equal(tr.basin, "SP")

def test_hdf5_io(self):
"""Test writting and reading hdf5 TCTracks instances"""
"""Test writing and reading hdf5 TCTracks instances"""
path = DATA_DIR.joinpath("tc_tracks.h5")
tc_track = tc.TCTracks.from_ibtracs_netcdf(
provider='usa', year_range=(1993, 1994), basin='EP', estimate_missing=True)
Expand Down