Skip to content

Commit

Permalink
Fix append FutureWarning (#764)
Browse files Browse the repository at this point in the history

Co-authored-by: Martin Gabdushev <[email protected]>
  • Loading branch information
alex-hse-repository and martins0n authored Jun 23, 2022
1 parent 2b86768 commit e3445c9
Show file tree
Hide file tree
Showing 8 changed files with 27 additions and 21 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
-
- Fix missing prophet in docker images ([#767](https://github.com/tinkoff-ai/etna/pull/767))
- Add `known_future` parameter to CLI ([#758](https://github.com/tinkoff-ai/etna/pull/758))
-
- FutureWarning: The frame.append method is deprecated. Use pandas.concat instead ([#764](https://github.com/tinkoff-ai/etna/pull/764))
-
-
-
Expand Down
2 changes: 1 addition & 1 deletion etna/analysis/eda_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,7 @@ def _prepare_seasonal_plot_df(
elif SeasonalPlotAlignment(alignment) == SeasonalPlotAlignment.last:
to_add_index = pd.date_range(end=timestamp.min(), periods=num_to_add + 1, closed="left", freq=freq)

df = df.append(pd.DataFrame(None, index=to_add_index)).sort_index()
df = pd.concat((df, pd.DataFrame(None, index=to_add_index))).sort_index()

return df

Expand Down
4 changes: 2 additions & 2 deletions etna/analysis/plotters.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,9 +343,9 @@ def plot_backtest(

# plot history
if history_len == "all":
plot_df = segment_history_df.append(segment_backtest_df)
plot_df = pd.concat((segment_history_df, segment_backtest_df))
elif history_len > 0:
plot_df = segment_history_df.tail(history_len).append(segment_backtest_df)
plot_df = pd.concat((segment_history_df.tail(history_len), segment_backtest_df))
else:
plot_df = segment_backtest_df
ax[i].plot(plot_df.index, plot_df.target, color=lines_colors["history"])
Expand Down
11 changes: 6 additions & 5 deletions etna/pipeline/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,13 +411,13 @@ def _get_backtest_metrics(self, aggregate_metrics: bool = False) -> pd.DataFrame
"""Get dataframe with metrics."""
if self._folds is None:
raise ValueError("Something went wrong during backtest initialization!")
metrics_df = pd.DataFrame()
metrics_dfs = []

for i, fold in self._folds.items():
fold_metrics = pd.DataFrame(fold["metrics"]).reset_index().rename({"index": "segment"}, axis=1)
fold_metrics[self._fold_column] = i
metrics_df = metrics_df.append(fold_metrics)

metrics_dfs.append(fold_metrics)
metrics_df = pd.concat(metrics_dfs)
metrics_df.sort_values(["segment", self._fold_column], inplace=True)

if aggregate_metrics:
Expand All @@ -429,14 +429,15 @@ def _get_fold_info(self) -> pd.DataFrame:
"""Get information about folds."""
if self._folds is None:
raise ValueError("Something went wrong during backtest initialization!")
timerange_df = pd.DataFrame()
timerange_dfs = []
for fold_number, fold_info in self._folds.items():
tmp_df = pd.DataFrame()
for stage_name in ("train", "test"):
for border in ("start", "end"):
tmp_df[f"{stage_name}_{border}_time"] = [fold_info[f"{stage_name}_timerange"][border]]
tmp_df[self._fold_column] = fold_number
timerange_df = timerange_df.append(tmp_df)
timerange_dfs.append(tmp_df)
timerange_df = pd.concat(timerange_dfs)
return timerange_df

def _get_backtest_forecasts(self) -> pd.DataFrame:
Expand Down
17 changes: 10 additions & 7 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,13 @@ def two_dfs_with_different_timestamps(random_seed):
"""Generate two dataframes with the same segments and different timestamps"""

def generate_df(start_time):
df = pd.DataFrame()
dfs = []
for i in range(5):
tmp = pd.DataFrame({"timestamp": pd.date_range(start_time, "2021-01-01")})
tmp["segment"] = f"segment_{i + 1}"
tmp["target"] = np.random.uniform(0, 10, len(tmp))
df = df.append(tmp)
dfs.append(tmp)
df = pd.concat(dfs)
df = df.pivot(index="timestamp", columns="segment")
df = df.reorder_levels([1, 0], axis=1)
df = df.sort_index(axis=1)
Expand All @@ -65,12 +66,13 @@ def two_dfs_with_different_segments_sets(random_seed):
"""Generate two dataframes with the same timestamps and different segments"""

def generate_df(n_segments):
df = pd.DataFrame()
dfs = []
for i in range(n_segments):
tmp = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", "2021-01-01")})
tmp["segment"] = f"segment_{i + 1}"
tmp["target"] = np.random.uniform(0, 10, len(tmp))
df = df.append(tmp)
dfs.append(tmp)
df = pd.concat(dfs)
df = df.pivot(index="timestamp", columns="segment")
df = df.reorder_levels([1, 0], axis=1)
df = df.sort_index(axis=1)
Expand All @@ -88,12 +90,13 @@ def train_test_dfs(random_seed):
"""Generate two dataframes with the same segments and the same timestamps"""

def generate_df():
df = pd.DataFrame()
dfs = []
for i in range(5):
tmp = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", "2021-01-01")})
tmp["segment"] = f"segment_{i + 1}"
tmp["target"] = np.random.uniform(0, 10, len(tmp))
df = df.append(tmp)
dfs.append(tmp)
df = pd.concat(dfs)
df = df.pivot(index="timestamp", columns="segment")
df = df.reorder_levels([1, 0], axis=1)
df = df.sort_index(axis=1)
Expand Down Expand Up @@ -316,7 +319,7 @@ def imbalanced_tsdf(random_seed) -> TSDataset:
df2["segment"] = "segment_2"
df2["target"] = np.random.uniform(0, 5, len(df2))

df = df1.append(df2)
df = pd.concat((df1, df2))
df = df.pivot(index="timestamp", columns="segment").reorder_levels([1, 0], axis=1).sort_index(axis=1)
df.columns.names = ["segment", "feature"]
ts = TSDataset(df, freq="D")
Expand Down
5 changes: 3 additions & 2 deletions tests/test_clustering/test_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,15 @@

@pytest.fixture
def eucl_ts(random_seed) -> TSDataset:
df = pd.DataFrame()
dfs = []
for i in range(1, 8):
date_range = pd.date_range("2020-01-01", "2020-05-01")
for j, sigma in enumerate([0.1, 0.3, 0.5, 0.8]):
tmp = pd.DataFrame({"timestamp": date_range})
tmp["segment"] = f"{i}{j}"
tmp["target"] = np.random.normal(i, sigma, len(tmp))
df = df.append(tmp, ignore_index=True)
dfs.append(tmp)
df = pd.concat(dfs, ignore_index=True)
ts = TSDataset(df=TSDataset.to_dataset(df), freq="D")
return ts

Expand Down
5 changes: 3 additions & 2 deletions tests/test_clustering/test_distances.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,14 @@ def pattern():
@pytest.fixture
def dtw_ts(pattern) -> TSDataset:
"""Get df with complex pattern with timestamp lag."""
df = pd.DataFrame()
dfs = []
for i in range(1, 8):
date_range = pd.date_range(f"2020-01-0{str(i)}", periods=35)
tmp = pd.DataFrame({"timestamp": date_range})
tmp["segment"] = str(i)
tmp["target"] = pattern
df = df.append(tmp, ignore_index=True)
dfs.append(tmp)
df = pd.concat(dfs, ignore_index=True)
ts = TSDataset(df=TSDataset.to_dataset(df), freq="D")
return ts

Expand Down
2 changes: 1 addition & 1 deletion tests/test_datasets/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -598,7 +598,7 @@ def test_to_flatten_with_exog(df_and_regressors_flat):
expected_df = flat_df[sorted_columns]
# add values to absent timestamps at one segment
to_append = pd.DataFrame({"timestamp": df["timestamp"][:5], "segment": ["2"] * 5})
expected_df = expected_df.append(to_append).sort_values(by=["segment", "timestamp"]).reset_index(drop=True)
expected_df = pd.concat((expected_df, to_append)).sort_values(by=["segment", "timestamp"]).reset_index(drop=True)
# rebuild category type according to new values

obtained_df = TSDataset.to_flatten(TSDataset.to_dataset(flat_df))[sorted_columns].sort_values(
Expand Down

1 comment on commit e3445c9

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.