Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up functions that use time dimension #1713

Merged
merged 7 commits into from
Oct 5, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions esmvalcore/cmor/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ def _get_next_month(month, year):

def _get_time_bounds(time, freq):
bounds = []
for step, point in enumerate(time.points):
month = time.cell(step).point.month
year = time.cell(step).point.year
dates = time.units.num2date(time.points)
for step, date in enumerate(dates):
month = date.month
year = date.year
if freq in ['mon', 'mo']:
next_month, next_year = _get_next_month(month, year)
min_bound = date2num(datetime(year, month, 1, 0, 0),
Expand All @@ -61,6 +62,7 @@ def _get_time_bounds(time, freq):
'3hr': 1.5 / 24,
'1hr': 0.5 / 24,
}
point = time.points[step]
min_bound = point - delta[freq]
max_bound = point + delta[freq]
bounds.append([min_bound, max_bound])
Expand Down Expand Up @@ -885,9 +887,10 @@ def _check_time_coord(self):
if freq.lower().endswith('pt'):
freq = freq[:-2]
if freq in ['mon', 'mo']:
dates = coord.units.num2date(coord.points)
for i in range(len(coord.points) - 1):
first = coord.cell(i).point
second = coord.cell(i + 1).point
first = dates[i]
second = dates[i + 1]
second_month = first.month + 1
second_year = first.year
if second_month == 13:
Expand All @@ -899,9 +902,10 @@ def _check_time_coord(self):
self.report_error(msg, var_name, freq)
break
elif freq == 'yr':
dates = coord.units.num2date(coord.points)
for i in range(len(coord.points) - 1):
first = coord.cell(i).point
second = coord.cell(i + 1).point
first = dates[i]
second = dates[i + 1]
second_month = first.month + 1
if first.year + 1 != second.year:
msg = '{}: Frequency {} does not match input data'
Expand Down
178 changes: 80 additions & 98 deletions esmvalcore/preprocessor/_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
filterwarnings(
'ignore',
"Collapsing a non-contiguous coordinate. "
"Metadata may not be fully descriptive for '{0}'.".format(_coord),
f"Metadata may not be fully descriptive for '{_coord}'.",
category=UserWarning,
module='iris',
)
Expand Down Expand Up @@ -78,36 +78,14 @@ def extract_time(cube, start_year, start_month, start_day, end_year, end_month,
ValueError
if time ranges are outside the cube time limits
"""
time_coord = cube.coord('time')
time_units = time_coord.units
if time_units.calendar == '360_day':
if start_day > 30:
start_day = 30
if end_day > 30:
end_day = 30
t_1 = PartialDateTime(year=int(start_year),
month=int(start_month),
day=int(start_day))
t_2 = PartialDateTime(year=int(end_year),
month=int(end_month),
day=int(end_day))

constraint = iris.Constraint(time=lambda t: t_1 <= t.point < t_2)

cube_slice = cube.extract(constraint)
if cube_slice is None:
raise ValueError(
f"Time slice {start_year:0>4d}-{start_month:0>2d}-{start_day:0>2d}"
f" to {end_year:0>4d}-{end_month:0>2d}-{end_day:0>2d} is outside "
f"cube time bounds {time_coord.cell(0)} to {time_coord.cell(-1)}.")

# Issue when time dimension was removed when only one point as selected.
if cube_slice.ndim != cube.ndim:
if cube_slice.coord('time') == time_coord:
logger.debug('No change needed to time.')
return cube

return cube_slice
return _extract_datetime(cube, t_1, t_2)


def _parse_start_date(date):
Expand Down Expand Up @@ -157,22 +135,11 @@ def _duration_to_date(duration, reference, sign):
return date


def _restore_time_coord_position(cube, original_time_index):
"""Restore original ordering of coordinates."""
# Coordinates before time
new_order = list(np.arange(original_time_index) + 1)

# Time coordinate
new_order.append(0)

# Coordinates after time
new_order = new_order + list(range(original_time_index + 1, cube.ndim))

# Transpose cube in-place
cube.transpose(new_order)


def _extract_datetime(cube, start_datetime, end_datetime):
def _extract_datetime(
cube: iris.cube.Cube,
sloosvel marked this conversation as resolved.
Show resolved Hide resolved
start_datetime: PartialDateTime,
end_datetime: PartialDateTime,
) -> iris.cube.Cube:
"""Extract a time range from a cube.

Given a time range passed in as a datetime.datetime object, it
Expand All @@ -183,9 +150,9 @@ def _extract_datetime(cube, start_datetime, end_datetime):
----------
cube: iris.cube.Cube
input cube.
start_datetime: datetime.datetime
start_datetime: PartialDateTime
start datetime
end_datetime: datetime.datetime
end_datetime: PartialDateTime
end datetime

Returns
Expand All @@ -201,44 +168,36 @@ def _extract_datetime(cube, start_datetime, end_datetime):
time_coord = cube.coord('time')
time_units = time_coord.units
if time_units.calendar == '360_day':
if start_datetime.day > 30:
start_datetime = start_datetime.replace(day=30)
if end_datetime.day > 30:
end_datetime = end_datetime.replace(day=30)

t_1 = PartialDateTime(year=int(start_datetime.year),
month=int(start_datetime.month),
day=int(start_datetime.day),
hour=int(start_datetime.hour),
minute=int(start_datetime.minute),
second=int(start_datetime.second))

t_2 = PartialDateTime(year=int(end_datetime.year),
month=int(end_datetime.month),
day=int(end_datetime.day),
hour=int(end_datetime.hour),
minute=int(end_datetime.minute),
second=int(end_datetime.second))

constraint = iris.Constraint(time=lambda t: t_1 <= t.point < t_2)
cube_slice = cube.extract(constraint)
if isinstance(start_datetime.day, int) and start_datetime.day > 30:
start_datetime.day = 30
if isinstance(end_datetime.day, int) and end_datetime.day > 30:
end_datetime.day = 30

if not cube.coord_dims(time_coord):
constraint = iris.Constraint(
time=lambda t: start_datetime <= t.point < end_datetime)
cube_slice = cube.extract(constraint)
else:
# Convert all time points to dates at once, this is much faster
# than using a constraint.
dates = time_coord.units.num2date(time_coord.points)
select = (dates >= start_datetime) & (dates < end_datetime)
if select.any():
cube_slice = cube.subset(time_coord[select])
else:
cube_slice = None

if cube_slice is None:
def dt2str(time: PartialDateTime) -> str:
txt = f"{time.year}-{time.month:02d}-{time.day:02d}"
if any([time.hour, time.minute, time.second]):
txt += f" {time.hour:02d}:{time.minute:02d}:{time.second:02d}"
return txt
raise ValueError(
f"Time slice {start_datetime.strftime('%Y-%m-%d')} "
f"to {end_datetime.strftime('%Y-%m-%d')} is outside "
f"Time slice {dt2str(start_datetime)} "
f"to {dt2str(end_datetime)} is outside "
f"cube time bounds {time_coord.cell(0)} to {time_coord.cell(-1)}.")
schlunma marked this conversation as resolved.
Show resolved Hide resolved

# If only a single point in time is extracted, the new time coordinate of
# cube_slice is a scalar coordinate. Convert this back to a regular
# dimensional coordinate with length 1. Note that iris.util.new_axis always
# puts the new axis at index 0, so we need to reorder the coordinates in
# case the original time coordinate was not at index 0.
if cube_slice.ndim < cube.ndim:
cube_slice = iris.util.new_axis(cube_slice, 'time')
original_time_index = cube.coord_dims(time_coord)[0]
if original_time_index != 0:
_restore_time_coord_position(cube_slice, original_time_index)

return cube_slice


Expand Down Expand Up @@ -280,7 +239,25 @@ def clip_timerange(cube, timerange):
end_date = _duration_to_date(end_date, start_date, sign=1)
end_date += datetime.timedelta(seconds=1)

return _extract_datetime(cube, start_date, end_date)
t_1 = PartialDateTime(
year=start_date.year,
month=start_date.month,
day=start_date.day,
hour=start_date.hour,
minute=start_date.minute,
second=start_date.second,
)

t_2 = PartialDateTime(
year=end_date.year,
month=end_date.month,
day=end_date.day,
hour=end_date.hour,
minute=end_date.minute,
second=end_date.second,
)

return _extract_datetime(cube, t_1, t_2)


def extract_season(cube, season):
Expand Down Expand Up @@ -566,9 +543,9 @@ def seasonal_statistics(cube,
iris.cube.Cube
Seasonal statistic cube
"""
seasons = tuple([sea.upper() for sea in seasons])
seasons = tuple(sea.upper() for sea in seasons)

if any([len(sea) < 2 for sea in seasons]):
if any(len(sea) < 2 for sea in seasons):
raise ValueError(
f"Minimum of 2 month is required per Seasons: {seasons}.")

Expand All @@ -579,7 +556,7 @@ def seasonal_statistics(cube,
seasons=seasons)
else:
old_seasons = list(set(cube.coord('clim_season').points))
if not all([osea in seasons for osea in old_seasons]):
if not all(osea in seasons for osea in old_seasons):
raise ValueError(
f"Seasons {seasons} do not match prior season extraction "
f"{old_seasons}.")
Expand Down Expand Up @@ -911,7 +888,8 @@ def regrid_time(cube, frequency):
cube with converted time axis and units.
"""
# standardize time points
time_c = [cell.point for cell in cube.coord('time').cells()]
coord = cube.coord('time')
time_c = coord.units.num2date(coord.points)
if frequency == 'yr':
time_cells = [datetime.datetime(t.year, 7, 1, 0, 0, 0) for t in time_c]
elif frequency == 'mon':
Expand Down Expand Up @@ -1057,9 +1035,9 @@ def timeseries_filter(cube,
if filter_type == 'lowpass':
wgts = low_pass_weights(window, 1. / span)
else:
raise NotImplementedError("Filter type {} not implemented, \
please choose one of {}".format(filter_type,
", ".join(supported_filters)))
raise NotImplementedError(
f"Filter type {filter_type} not implemented, "
f"please choose one of {', '.join(supported_filters)}")

# Apply filter
aggregation_operator = get_iris_analysis_operation(filter_stats)
Expand Down Expand Up @@ -1119,9 +1097,16 @@ def resample_hours(cube, interval, offset=0):
if cube_period.total_seconds() / 3600 >= interval:
raise ValueError(f"Data period ({cube_period}) should be lower than "
f"the interval ({interval})")
hours = range(0 + offset, 24, interval)
select_hours = iris.Constraint(time=lambda cell: cell.point.hour in hours)
return cube.extract(select_hours)
hours = [PartialDateTime(hour=h) for h in range(0 + offset, 24, interval)]
dates = time.units.num2date(time.points)
select = np.zeros(len(dates), dtype=bool)
for hour in hours:
select |= dates == hour
if not select.any():
raise ValueError(
f"Time coordinate {dates} does not contain {hours} for {cube}")

return cube.subset(time[select])


def resample_time(cube, month=None, day=None, hour=None):
Expand Down Expand Up @@ -1162,14 +1147,11 @@ def resample_time(cube, month=None, day=None, hour=None):
iris.cube.Cube
Cube with the new frequency.
"""
def compare(cell):
date = cell.point
if month is not None and month != date.month:
return False
if day is not None and day != date.day:
return False
if hour is not None and hour != date.hour:
return False
return True

return cube.extract(iris.Constraint(time=compare))
time = cube.coord('time')
dates = time.units.num2date(time.points)
requested = PartialDateTime(month=month, day=day, hour=hour)
select = dates == requested
if not select.any():
raise ValueError(
f"Time coordinate {dates} does not contain {requested} for {cube}")
return cube.subset(time[select])
Loading