Skip to content

Commit

Permalink
fix: resample method for TimeSeriesBoolean (#349)
Browse files Browse the repository at this point in the history
If a period between two timesteps in the resampled boolean variable corresponds to
several periods in the original boolean variable, the resampling method should
return False if any of the respecitve periods in the original boolean variable are False.
  • Loading branch information
olelod authored Jan 19, 2024
1 parent 5c1c98c commit 8feaf6c
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 17 deletions.
7 changes: 5 additions & 2 deletions src/libecalc/common/time_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import enum
from dataclasses import dataclass
from datetime import date, datetime
from datetime import date, datetime, timedelta
from typing import Any, Dict, List, Optional, Tuple, Union

import numpy as np
Expand Down Expand Up @@ -211,7 +211,10 @@ def create_time_steps(
the requested frequency
"""
date_range = pd.date_range(start=start, end=end, freq=frequency.value)
# If the start date or end date is part of the date_range made by the frequency, the returned date range will
# always include the start and end date (no matter what the include_start_date and include_end_date booleans are).
# To avoid this add one day to start and subtract one day from end.
date_range = pd.date_range(start=start + timedelta(days=1), end=end - timedelta(days=1), freq=frequency.value)

time_steps = [clear_time(time_step) for time_step in date_range]
if include_start_date:
Expand Down
25 changes: 18 additions & 7 deletions src/libecalc/common/utils/rates.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,9 @@ def resample(self, freq: Frequency, include_start_date: bool = True, include_end
class TimeSeriesBoolean(TimeSeries[bool]):
def resample(self, freq: Frequency, include_start_date: bool = True, include_end_date: bool = True) -> Self:
"""
Resample using forward-fill This means that a value is assumed to be the same until the next observation,
e.g. covering the whole period interval.
If a period between two time steps in the return time vector contains more than one time step in the
original vector, check if any of the relevant values in the time original time vector is False. Then the
resampled value for that time step will be False.
Args:
freq: The frequency the time series should be resampled to
Expand All @@ -441,16 +442,26 @@ def resample(self, freq: Frequency, include_start_date: bool = True, include_end
if freq is Frequency.NONE:
return self.copy()

ds = pd.Series(index=self.timesteps, data=self.values)

# Always make new time series WITH end date, but remove it later is not needed
new_timeseries = resample_time_steps(
self.timesteps, frequency=freq, include_start_date=include_start_date, include_end_date=include_end_date
self.timesteps, frequency=freq, include_start_date=include_start_date, include_end_date=True
)
ds_resampled = ds.reindex(new_timeseries).ffill()
resampled = []

# Iterate over all pairs of subsequent dates in the new time vector
for start_period, end_period in zip(new_timeseries[:-1], new_timeseries[1:]):
start_index = self.timesteps.index(max([date for date in self.timesteps if date <= start_period]))
end_index = self.timesteps.index(max([date for date in self.timesteps if date < end_period]))
resampled.append(all(self.values[start_index : end_index + 1]))

if include_end_date:
resampled.append(self.values[-1])
else:
new_timeseries.pop()

return TimeSeriesBoolean(
timesteps=new_timeseries,
values=[bool(x) for x in ds_resampled.values.tolist()],
values=resampled,
unit=self.unit,
)

Expand Down
26 changes: 18 additions & 8 deletions src/tests/libecalc/common/utils/test_rates.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,16 @@ class TestBooleanTimeSeries:
@pytest.fixture
def boolean_series(self):
return TimeSeriesBoolean(
values=[False, True, True, False, True],
values=[False, True, True, False, True, True, False, True],
timesteps=[
datetime(2019, 7, 1),
datetime(2020, 1, 1),
datetime(2020, 7, 1),
datetime(2021, 1, 1),
datetime(2021, 7, 1),
datetime(2022, 1, 1),
datetime(2022, 7, 1),
datetime(2023, 1, 1),
],
unit=Unit.NONE,
)
Expand All @@ -92,36 +95,43 @@ def two_first_timesteps(self, boolean_series):
def test_resample_boolean(self, boolean_series):
# resample including start and end date
yearly_values = boolean_series.resample(freq=Frequency.YEAR)
assert yearly_values.values == [False, True, False, True]
assert yearly_values.values == [False, True, False, False, True]
assert yearly_values.timesteps == [
datetime(2019, 7, 1),
datetime(2020, 1, 1),
datetime(2021, 1, 1),
datetime(2021, 7, 1),
datetime(2022, 1, 1),
datetime(2023, 1, 1),
]

# resample including start and without end date
yearly_values = boolean_series.resample(freq=Frequency.YEAR, include_end_date=False)
assert yearly_values.values == [False, True, False]
assert yearly_values.values == [False, True, False, False]
assert yearly_values.timesteps == [
datetime(2019, 7, 1),
datetime(2020, 1, 1),
datetime(2021, 1, 1),
datetime(2022, 1, 1),
]

# resample without start and including end date
yearly_values = boolean_series.resample(freq=Frequency.YEAR, include_start_date=False)
assert yearly_values.values == [True, False, True]
assert yearly_values.values == [True, False, False, True]
assert yearly_values.timesteps == [
datetime(2020, 1, 1),
datetime(2021, 1, 1),
datetime(2021, 7, 1),
datetime(2022, 1, 1),
datetime(2023, 1, 1),
]

# resample without start and end date
yearly_values = boolean_series.resample(freq=Frequency.YEAR, include_start_date=False, include_end_date=False)
assert yearly_values.values == [True, False]
assert yearly_values.timesteps == [datetime(2020, 1, 1), datetime(2021, 1, 1)]
assert yearly_values.values == [True, False, False]
assert yearly_values.timesteps == [
datetime(2020, 1, 1),
datetime(2021, 1, 1),
datetime(2022, 1, 1),
]

def test_indexing(self, boolean_series):
first_timestep = TimeSeriesBoolean(
Expand Down

0 comments on commit 8feaf6c

Please sign in to comment.