Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up functions that use time dimension #1713

Merged
merged 7 commits into from
Oct 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions esmvalcore/cmor/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,10 @@ def _get_next_month(month, year):

def _get_time_bounds(time, freq):
bounds = []
for step, point in enumerate(time.points):
month = time.cell(step).point.month
year = time.cell(step).point.year
dates = time.units.num2date(time.points)
for step, date in enumerate(dates):
month = date.month
year = date.year
if freq in ['mon', 'mo']:
next_month, next_year = _get_next_month(month, year)
min_bound = date2num(datetime(year, month, 1, 0, 0),
Expand All @@ -61,6 +62,7 @@ def _get_time_bounds(time, freq):
'3hr': 1.5 / 24,
'1hr': 0.5 / 24,
}
point = time.points[step]
min_bound = point - delta[freq]
max_bound = point + delta[freq]
bounds.append([min_bound, max_bound])
Expand Down Expand Up @@ -885,9 +887,10 @@ def _check_time_coord(self):
if freq.lower().endswith('pt'):
freq = freq[:-2]
if freq in ['mon', 'mo']:
dates = coord.units.num2date(coord.points)
for i in range(len(coord.points) - 1):
first = coord.cell(i).point
second = coord.cell(i + 1).point
first = dates[i]
second = dates[i + 1]
second_month = first.month + 1
second_year = first.year
if second_month == 13:
Expand All @@ -899,9 +902,10 @@ def _check_time_coord(self):
self.report_error(msg, var_name, freq)
break
elif freq == 'yr':
dates = coord.units.num2date(coord.points)
for i in range(len(coord.points) - 1):
first = coord.cell(i).point
second = coord.cell(i + 1).point
first = dates[i]
second = dates[i + 1]
second_month = first.month + 1
if first.year + 1 != second.year:
msg = '{}: Frequency {} does not match input data'
Expand Down
195 changes: 97 additions & 98 deletions esmvalcore/preprocessor/_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
filterwarnings(
'ignore',
"Collapsing a non-contiguous coordinate. "
"Metadata may not be fully descriptive for '{0}'.".format(_coord),
f"Metadata may not be fully descriptive for '{_coord}'.",
category=UserWarning,
module='iris',
)
Expand Down Expand Up @@ -78,36 +78,14 @@ def extract_time(cube, start_year, start_month, start_day, end_year, end_month,
ValueError
if time ranges are outside the cube time limits
"""
time_coord = cube.coord('time')
time_units = time_coord.units
if time_units.calendar == '360_day':
if start_day > 30:
start_day = 30
if end_day > 30:
end_day = 30
t_1 = PartialDateTime(year=int(start_year),
month=int(start_month),
day=int(start_day))
t_2 = PartialDateTime(year=int(end_year),
month=int(end_month),
day=int(end_day))

constraint = iris.Constraint(time=lambda t: t_1 <= t.point < t_2)

cube_slice = cube.extract(constraint)
if cube_slice is None:
raise ValueError(
f"Time slice {start_year:0>4d}-{start_month:0>2d}-{start_day:0>2d}"
f" to {end_year:0>4d}-{end_month:0>2d}-{end_day:0>2d} is outside "
f"cube time bounds {time_coord.cell(0)} to {time_coord.cell(-1)}.")

# Issue when time dimension was removed when only one point as selected.
if cube_slice.ndim != cube.ndim:
if cube_slice.coord('time') == time_coord:
logger.debug('No change needed to time.')
return cube

return cube_slice
return _extract_datetime(cube, t_1, t_2)


def _parse_start_date(date):
Expand Down Expand Up @@ -157,22 +135,27 @@ def _duration_to_date(duration, reference, sign):
return date


def _restore_time_coord_position(cube, original_time_index):
"""Restore original ordering of coordinates."""
# Coordinates before time
new_order = list(np.arange(original_time_index) + 1)

# Time coordinate
new_order.append(0)

# Coordinates after time
new_order = new_order + list(range(original_time_index + 1, cube.ndim))

# Transpose cube in-place
cube.transpose(new_order)
def _select_timeslice(
cube: iris.cube.Cube,
select: np.ndarray,
) -> iris.cube.Cube | None:
"""Slice a cube along its time axis."""
if select.any():
coord = cube.coord('time')
time_dim = cube.coord_dims(coord)[0]
slices = tuple(select if i == time_dim else slice(None)
for i in range(cube.ndim))
cube_slice = cube[slices]
else:
cube_slice = None
return cube_slice


def _extract_datetime(cube, start_datetime, end_datetime):
def _extract_datetime(
cube: iris.cube.Cube,
sloosvel marked this conversation as resolved.
Show resolved Hide resolved
start_datetime: PartialDateTime,
end_datetime: PartialDateTime,
) -> iris.cube.Cube:
"""Extract a time range from a cube.

Given a time range passed in as a datetime.datetime object, it
Expand All @@ -183,9 +166,9 @@ def _extract_datetime(cube, start_datetime, end_datetime):
----------
cube: iris.cube.Cube
input cube.
start_datetime: datetime.datetime
start_datetime: PartialDateTime
start datetime
end_datetime: datetime.datetime
end_datetime: PartialDateTime
end datetime

Returns
Expand All @@ -201,43 +184,34 @@ def _extract_datetime(cube, start_datetime, end_datetime):
time_coord = cube.coord('time')
time_units = time_coord.units
if time_units.calendar == '360_day':
if start_datetime.day > 30:
start_datetime = start_datetime.replace(day=30)
if end_datetime.day > 30:
end_datetime = end_datetime.replace(day=30)

t_1 = PartialDateTime(year=int(start_datetime.year),
month=int(start_datetime.month),
day=int(start_datetime.day),
hour=int(start_datetime.hour),
minute=int(start_datetime.minute),
second=int(start_datetime.second))

t_2 = PartialDateTime(year=int(end_datetime.year),
month=int(end_datetime.month),
day=int(end_datetime.day),
hour=int(end_datetime.hour),
minute=int(end_datetime.minute),
second=int(end_datetime.second))

constraint = iris.Constraint(time=lambda t: t_1 <= t.point < t_2)
cube_slice = cube.extract(constraint)
if isinstance(start_datetime.day, int) and start_datetime.day > 30:
start_datetime.day = 30
if isinstance(end_datetime.day, int) and end_datetime.day > 30:
end_datetime.day = 30

if not cube.coord_dims(time_coord):
constraint = iris.Constraint(
time=lambda t: start_datetime <= t.point < end_datetime)
cube_slice = cube.extract(constraint)
else:
# Convert all time points to dates at once, this is much faster
# than using a constraint.
dates = time_coord.units.num2date(time_coord.points)
select = (dates >= start_datetime) & (dates < end_datetime)
cube_slice = _select_timeslice(cube, select)

if cube_slice is None:

def dt2str(time: PartialDateTime) -> str:
txt = f"{time.year}-{time.month:02d}-{time.day:02d}"
if any([time.hour, time.minute, time.second]):
txt += f" {time.hour:02d}:{time.minute:02d}:{time.second:02d}"
return txt
raise ValueError(
f"Time slice {start_datetime.strftime('%Y-%m-%d')} "
f"to {end_datetime.strftime('%Y-%m-%d')} is outside "
f"cube time bounds {time_coord.cell(0)} to {time_coord.cell(-1)}.")

# If only a single point in time is extracted, the new time coordinate of
# cube_slice is a scalar coordinate. Convert this back to a regular
# dimensional coordinate with length 1. Note that iris.util.new_axis always
# puts the new axis at index 0, so we need to reorder the coordinates in
# case the original time coordinate was not at index 0.
if cube_slice.ndim < cube.ndim:
cube_slice = iris.util.new_axis(cube_slice, 'time')
original_time_index = cube.coord_dims(time_coord)[0]
if original_time_index != 0:
_restore_time_coord_position(cube_slice, original_time_index)
f"Time slice {dt2str(start_datetime)} "
f"to {dt2str(end_datetime)} is outside "
f"cube time bounds {time_coord.cell(0).point} to "
f"{time_coord.cell(-1).point}.")

return cube_slice

Expand Down Expand Up @@ -280,7 +254,25 @@ def clip_timerange(cube, timerange):
end_date = _duration_to_date(end_date, start_date, sign=1)
end_date += datetime.timedelta(seconds=1)

return _extract_datetime(cube, start_date, end_date)
t_1 = PartialDateTime(
year=start_date.year,
month=start_date.month,
day=start_date.day,
hour=start_date.hour,
minute=start_date.minute,
second=start_date.second,
)

t_2 = PartialDateTime(
year=end_date.year,
month=end_date.month,
day=end_date.day,
hour=end_date.hour,
minute=end_date.minute,
second=end_date.second,
)

return _extract_datetime(cube, t_1, t_2)


def extract_season(cube, season):
Expand Down Expand Up @@ -566,9 +558,9 @@ def seasonal_statistics(cube,
iris.cube.Cube
Seasonal statistic cube
"""
seasons = tuple([sea.upper() for sea in seasons])
seasons = tuple(sea.upper() for sea in seasons)

if any([len(sea) < 2 for sea in seasons]):
if any(len(sea) < 2 for sea in seasons):
raise ValueError(
f"Minimum of 2 month is required per Seasons: {seasons}.")

Expand All @@ -578,8 +570,8 @@ def seasonal_statistics(cube,
name='clim_season',
seasons=seasons)
else:
old_seasons = list(set(cube.coord('clim_season').points))
if not all([osea in seasons for osea in old_seasons]):
old_seasons = sorted(set(cube.coord('clim_season').points))
if not all(osea in seasons for osea in old_seasons):
raise ValueError(
f"Seasons {seasons} do not match prior season extraction "
f"{old_seasons}.")
Expand Down Expand Up @@ -911,7 +903,8 @@ def regrid_time(cube, frequency):
cube with converted time axis and units.
"""
# standardize time points
time_c = [cell.point for cell in cube.coord('time').cells()]
coord = cube.coord('time')
time_c = coord.units.num2date(coord.points)
if frequency == 'yr':
time_cells = [datetime.datetime(t.year, 7, 1, 0, 0, 0) for t in time_c]
elif frequency == 'mon':
Expand Down Expand Up @@ -1057,9 +1050,9 @@ def timeseries_filter(cube,
if filter_type == 'lowpass':
wgts = low_pass_weights(window, 1. / span)
else:
raise NotImplementedError("Filter type {} not implemented, \
please choose one of {}".format(filter_type,
", ".join(supported_filters)))
raise NotImplementedError(
f"Filter type {filter_type} not implemented, "
f"please choose one of {', '.join(supported_filters)}")

# Apply filter
aggregation_operator = get_iris_analysis_operation(filter_stats)
Expand Down Expand Up @@ -1119,9 +1112,17 @@ def resample_hours(cube, interval, offset=0):
if cube_period.total_seconds() / 3600 >= interval:
raise ValueError(f"Data period ({cube_period}) should be lower than "
f"the interval ({interval})")
hours = range(0 + offset, 24, interval)
select_hours = iris.Constraint(time=lambda cell: cell.point.hour in hours)
return cube.extract(select_hours)
hours = [PartialDateTime(hour=h) for h in range(0 + offset, 24, interval)]
dates = time.units.num2date(time.points)
select = np.zeros(len(dates), dtype=bool)
for hour in hours:
select |= dates == hour
cube = _select_timeslice(cube, select)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think select_timeslice fails if time is a scalar dimension due to this line:

time_dim = cube.coord_dims(coord)[0]

Do we need to consider this here? In _extract_datetime you considered this case explicitly.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can make it a bit nicer still, but using this function on a scalar cube really doesn't make sense: at best it will return the original cube.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not saying we need to support this, but it would probably be nice to check if the coordinate is scalar and return a nice error message. Otherwise the line time_dim = cube.coord_dims(coord)[0] will probably produce an ugly IndexError.

if cube is None:
raise ValueError(
f"Time coordinate {dates} does not contain {hours} for {cube}")

return cube


def resample_time(cube, month=None, day=None, hour=None):
Expand Down Expand Up @@ -1162,14 +1163,12 @@ def resample_time(cube, month=None, day=None, hour=None):
iris.cube.Cube
Cube with the new frequency.
"""
def compare(cell):
date = cell.point
if month is not None and month != date.month:
return False
if day is not None and day != date.day:
return False
if hour is not None and hour != date.hour:
return False
return True

return cube.extract(iris.Constraint(time=compare))
time = cube.coord('time')
dates = time.units.num2date(time.points)
requested = PartialDateTime(month=month, day=day, hour=hour)
select = dates == requested
cube = _select_timeslice(cube, select)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same comment about the scalar time coordinate.

if cube is None:
raise ValueError(
f"Time coordinate {dates} does not contain {requested} for {cube}")
return cube
Loading