diff --git a/CHANGELOG.md b/CHANGELOG.md index fe94679b6..ec5931814 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,7 +39,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - - Fix missing prophet in docker images ([#767](https://github.com/tinkoff-ai/etna/pull/767)) - Add `known_future` parameter to CLI ([#758](https://github.com/tinkoff-ai/etna/pull/758)) -- +- FutureWarning: The frame.append method is deprecated. Use pandas.concat instead ([#764](https://github.com/tinkoff-ai/etna/pull/764)) - - - diff --git a/etna/analysis/eda_utils.py b/etna/analysis/eda_utils.py index b92d87a90..d75f2b40b 100644 --- a/etna/analysis/eda_utils.py +++ b/etna/analysis/eda_utils.py @@ -743,7 +743,7 @@ def _prepare_seasonal_plot_df( elif SeasonalPlotAlignment(alignment) == SeasonalPlotAlignment.last: to_add_index = pd.date_range(end=timestamp.min(), periods=num_to_add + 1, closed="left", freq=freq) - df = df.append(pd.DataFrame(None, index=to_add_index)).sort_index() + df = pd.concat((df, pd.DataFrame(None, index=to_add_index))).sort_index() return df diff --git a/etna/analysis/plotters.py b/etna/analysis/plotters.py index 9c1fe5e9f..73efeb2ff 100644 --- a/etna/analysis/plotters.py +++ b/etna/analysis/plotters.py @@ -343,9 +343,9 @@ def plot_backtest( # plot history if history_len == "all": - plot_df = segment_history_df.append(segment_backtest_df) + plot_df = pd.concat((segment_history_df, segment_backtest_df)) elif history_len > 0: - plot_df = segment_history_df.tail(history_len).append(segment_backtest_df) + plot_df = pd.concat((segment_history_df.tail(history_len), segment_backtest_df)) else: plot_df = segment_backtest_df ax[i].plot(plot_df.index, plot_df.target, color=lines_colors["history"]) diff --git a/etna/pipeline/base.py b/etna/pipeline/base.py index fdad36708..89a446673 100644 --- a/etna/pipeline/base.py +++ b/etna/pipeline/base.py @@ -411,13 +411,13 @@ def _get_backtest_metrics(self, aggregate_metrics: bool = False) -> pd.DataFrame """Get dataframe with metrics.""" if self._folds is None: raise ValueError("Something went wrong during backtest initialization!") - metrics_df = pd.DataFrame() + metrics_dfs = [] for i, fold in self._folds.items(): fold_metrics = pd.DataFrame(fold["metrics"]).reset_index().rename({"index": "segment"}, axis=1) fold_metrics[self._fold_column] = i - metrics_df = metrics_df.append(fold_metrics) - + metrics_dfs.append(fold_metrics) + metrics_df = pd.concat(metrics_dfs) metrics_df.sort_values(["segment", self._fold_column], inplace=True) if aggregate_metrics: @@ -429,14 +429,15 @@ def _get_fold_info(self) -> pd.DataFrame: """Get information about folds.""" if self._folds is None: raise ValueError("Something went wrong during backtest initialization!") - timerange_df = pd.DataFrame() + timerange_dfs = [] for fold_number, fold_info in self._folds.items(): tmp_df = pd.DataFrame() for stage_name in ("train", "test"): for border in ("start", "end"): tmp_df[f"{stage_name}_{border}_time"] = [fold_info[f"{stage_name}_timerange"][border]] tmp_df[self._fold_column] = fold_number - timerange_df = timerange_df.append(tmp_df) + timerange_dfs.append(tmp_df) + timerange_df = pd.concat(timerange_dfs) return timerange_df def _get_backtest_forecasts(self) -> pd.DataFrame: diff --git a/tests/conftest.py b/tests/conftest.py index a76d44c64..5f2e1d2a2 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,12 +42,13 @@ def two_dfs_with_different_timestamps(random_seed): """Generate two dataframes with the same segments and different timestamps""" def generate_df(start_time): - df = pd.DataFrame() + dfs = [] for i in range(5): tmp = pd.DataFrame({"timestamp": pd.date_range(start_time, "2021-01-01")}) tmp["segment"] = f"segment_{i + 1}" tmp["target"] = np.random.uniform(0, 10, len(tmp)) - df = df.append(tmp) + dfs.append(tmp) + df = pd.concat(dfs) df = df.pivot(index="timestamp", columns="segment") df = df.reorder_levels([1, 0], axis=1) df = df.sort_index(axis=1) @@ -65,12 +66,13 @@ def two_dfs_with_different_segments_sets(random_seed): """Generate two dataframes with the same timestamps and different segments""" def generate_df(n_segments): - df = pd.DataFrame() + dfs = [] for i in range(n_segments): tmp = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", "2021-01-01")}) tmp["segment"] = f"segment_{i + 1}" tmp["target"] = np.random.uniform(0, 10, len(tmp)) - df = df.append(tmp) + dfs.append(tmp) + df = pd.concat(dfs) df = df.pivot(index="timestamp", columns="segment") df = df.reorder_levels([1, 0], axis=1) df = df.sort_index(axis=1) @@ -88,12 +90,13 @@ def train_test_dfs(random_seed): """Generate two dataframes with the same segments and the same timestamps""" def generate_df(): - df = pd.DataFrame() + dfs = [] for i in range(5): tmp = pd.DataFrame({"timestamp": pd.date_range("2020-01-01", "2021-01-01")}) tmp["segment"] = f"segment_{i + 1}" tmp["target"] = np.random.uniform(0, 10, len(tmp)) - df = df.append(tmp) + dfs.append(tmp) + df = pd.concat(dfs) df = df.pivot(index="timestamp", columns="segment") df = df.reorder_levels([1, 0], axis=1) df = df.sort_index(axis=1) @@ -316,7 +319,7 @@ def imbalanced_tsdf(random_seed) -> TSDataset: df2["segment"] = "segment_2" df2["target"] = np.random.uniform(0, 5, len(df2)) - df = df1.append(df2) + df = pd.concat((df1, df2)) df = df.pivot(index="timestamp", columns="segment").reorder_levels([1, 0], axis=1).sort_index(axis=1) df.columns.names = ["segment", "feature"] ts = TSDataset(df, freq="D") diff --git a/tests/test_clustering/test_clustering.py b/tests/test_clustering/test_clustering.py index b12213442..6efd09873 100644 --- a/tests/test_clustering/test_clustering.py +++ b/tests/test_clustering/test_clustering.py @@ -12,14 +12,15 @@ @pytest.fixture def eucl_ts(random_seed) -> TSDataset: - df = pd.DataFrame() + dfs = [] for i in range(1, 8): date_range = pd.date_range("2020-01-01", "2020-05-01") for j, sigma in enumerate([0.1, 0.3, 0.5, 0.8]): tmp = pd.DataFrame({"timestamp": date_range}) tmp["segment"] = f"{i}{j}" tmp["target"] = np.random.normal(i, sigma, len(tmp)) - df = df.append(tmp, ignore_index=True) + dfs.append(tmp) + df = pd.concat(dfs, ignore_index=True) ts = TSDataset(df=TSDataset.to_dataset(df), freq="D") return ts diff --git a/tests/test_clustering/test_distances.py b/tests/test_clustering/test_distances.py index a1e550a31..36365abe6 100644 --- a/tests/test_clustering/test_distances.py +++ b/tests/test_clustering/test_distances.py @@ -34,13 +34,14 @@ def pattern(): @pytest.fixture def dtw_ts(pattern) -> TSDataset: """Get df with complex pattern with timestamp lag.""" - df = pd.DataFrame() + dfs = [] for i in range(1, 8): date_range = pd.date_range(f"2020-01-0{str(i)}", periods=35) tmp = pd.DataFrame({"timestamp": date_range}) tmp["segment"] = str(i) tmp["target"] = pattern - df = df.append(tmp, ignore_index=True) + dfs.append(tmp) + df = pd.concat(dfs, ignore_index=True) ts = TSDataset(df=TSDataset.to_dataset(df), freq="D") return ts diff --git a/tests/test_datasets/test_dataset.py b/tests/test_datasets/test_dataset.py index ed2fcf84d..944488eef 100644 --- a/tests/test_datasets/test_dataset.py +++ b/tests/test_datasets/test_dataset.py @@ -598,7 +598,7 @@ def test_to_flatten_with_exog(df_and_regressors_flat): expected_df = flat_df[sorted_columns] # add values to absent timestamps at one segment to_append = pd.DataFrame({"timestamp": df["timestamp"][:5], "segment": ["2"] * 5}) - expected_df = expected_df.append(to_append).sort_values(by=["segment", "timestamp"]).reset_index(drop=True) + expected_df = pd.concat((expected_df, to_append)).sort_values(by=["segment", "timestamp"]).reset_index(drop=True) # rebuild category type according to new values obtained_df = TSDataset.to_flatten(TSDataset.to_dataset(flat_df))[sorted_columns].sort_values(