From 8334f005852267ba8483f7c07530380dce2e8882 Mon Sep 17 00:00:00 2001 From: Oleksandr Shchur Date: Wed, 17 Jan 2024 15:09:45 +0000 Subject: [PATCH 1/2] Simplify TimeSeriesCloudPredictor API --- docs/index.md | 26 +++----- .../predictor/timeseries_cloud_predictor.py | 62 ++++++++++--------- tests/unittests/timeseries/test_timeseries.py | 17 +---- 3 files changed, 43 insertions(+), 62 deletions(-) diff --git a/docs/index.md b/docs/index.md index 261868ac..c7d7301a 100644 --- a/docs/index.md +++ b/docs/index.md @@ -111,39 +111,27 @@ import pandas as pd from autogluon.cloud import TimeSeriesCloudPredictor data = pd.read_csv("https://autogluon.s3.amazonaws.com/datasets/cloud/timeseries_train.csv") -id_column="item_id" -timestamp_column="timestamp" -target="target" predictor_init_args = { - "target": target + "target": "target", + "prediction_length" : 24, } # args used when creating TimeSeriesPredictor() predictor_fit_args = { "train_data": data, - "time_limit": 120 + "time_limit": 120, } # args passed to TimeSeriesPredictor.fit() cloud_predictor = TimeSeriesCloudPredictor(cloud_output_path="YOUR_S3_BUCKET_PATH") cloud_predictor.fit( predictor_init_args=predictor_init_args, predictor_fit_args=predictor_fit_args, - id_column=id_column, - timestamp_column=timestamp_column + id_column="item_id", + timestamp_column="timestamp", ) cloud_predictor.deploy() -result = cloud_predictor.predict_real_time( - test_data=data, - id_column=id_column, - timestamp_column=timestamp_column, - target=target -) +result = cloud_predictor.predict_real_time(data) cloud_predictor.cleanup_deployment() # Batch inference -result = cloud_predictor.predict( - test_data=data, - id_column=id_column, - timestamp_column=timestamp_column, - target=target -) +result = cloud_predictor.predict(data) ``` ::: diff --git a/src/autogluon/cloud/predictor/timeseries_cloud_predictor.py b/src/autogluon/cloud/predictor/timeseries_cloud_predictor.py index 9d3ea66c..e5018e94 100644 --- a/src/autogluon/cloud/predictor/timeseries_cloud_predictor.py +++ b/src/autogluon/cloud/predictor/timeseries_cloud_predictor.py @@ -15,6 +15,23 @@ class TimeSeriesCloudPredictor(CloudPredictor): predictor_file_name = "TimeSeriesCloudPredictor.pkl" backend_map = {SAGEMAKER: TIMESERIES_SAGEMAKER} + def __init__( + self, + local_output_path: Optional[str] = None, + cloud_output_path: Optional[str] = None, + backend: str = SAGEMAKER, + verbosity: int = 2, + ) -> None: + super().__init__( + local_output_path=local_output_path, + cloud_output_path=cloud_output_path, + backend=backend, + verbosity=verbosity, + ) + self.target_column: Optional[str] = None + self.id_column: Optional[str] = None + self.timestamp_column: Optional[str] = None + @property def predictor_type(self): """ @@ -33,8 +50,8 @@ def fit( *, predictor_init_args: Dict[str, Any], predictor_fit_args: Dict[str, Any], - id_column: str, - timestamp_column: str, + id_column: str = "item_id", + timestamp_column: str = "timestamp", static_features: Optional[Union[str, pd.DataFrame]] = None, framework_version: str = "latest", job_name: Optional[str] = None, @@ -56,10 +73,10 @@ def fit( Init args for the predictor predictor_fit_args: dict Fit args for the predictor - id_column: str - Name of the 'item_id' column - timestamp_column: str - Name of the 'timestamp' column + id_column: str, default = "item_id" + Name of the item ID column + timestamp_column: str, default = "timestamp" + Name of the timestamp column static_features: Optional[pd.DataFrame] An optional data frame describing the metadata attributes of individual items in the item index. For more detail, please refer to `TimeSeriesDataFrame` documentation: @@ -102,6 +119,11 @@ def fit( ), "Predictor is already fit! To fit additional models, create a new `CloudPredictor`" if backend_kwargs is None: backend_kwargs = {} + + self.target_column = predictor_init_args.get("target", "target") + self.id_column = id_column + self.timestamp_column = timestamp_column + backend_kwargs = self.backend.parse_backend_fit_kwargs(backend_kwargs) self.backend.fit( predictor_init_args=predictor_init_args, @@ -124,9 +146,6 @@ def fit( def predict_real_time( self, test_data: Union[str, pd.DataFrame], - id_column: str, - timestamp_column: str, - target: str, static_features: Optional[Union[str, pd.DataFrame]] = None, accept: str = "application/x-parquet", **kwargs, @@ -141,16 +160,10 @@ def predict_real_time( test_data: Union(str, pandas.DataFrame) The test data to be inferenced. Can be a pandas.DataFrame or a local path to a csv file. - id_column: str - Name of the 'item_id' column - timestamp_column: str - Name of the 'timestamp' column static_features: Optional[pd.DataFrame] An optional data frame describing the metadata attributes of individual items in the item index. For more detail, please refer to `TimeSeriesDataFrame` documentation: https://auto.gluon.ai/stable/api/autogluon.predictor.html#timeseriesdataframe - target: str - Name of column that contains the target values to forecast accept: str, default = application/x-parquet Type of accept output content. Valid options are application/x-parquet, text/csv, application/json @@ -164,9 +177,9 @@ def predict_real_time( """ return self.backend.predict_real_time( test_data=test_data, - id_column=id_column, - timestamp_column=timestamp_column, - target=target, + id_column=self.id_column, + timestamp_column=self.timestamp_column, + target=self.target_column, static_features=static_features, accept=accept, ) @@ -177,9 +190,6 @@ def predict_proba_real_time(self, **kwargs) -> pd.DataFrame: def predict( self, test_data: Union[str, pd.DataFrame], - id_column: str, - timestamp_column: str, - target: str, static_features: Optional[Union[str, pd.DataFrame]] = None, predictor_path: Optional[str] = None, framework_version: str = "latest", @@ -203,10 +213,6 @@ def predict( test_data: str The test data to be inferenced. Can be a pandas.DataFrame or a local path to a csv file. - id_column: str - Name of the 'item_id' column - timestamp_column: str - Name of the 'timestamp' column static_features: Optional[Union[str, pd.DataFrame]] An optional data frame describing the metadata attributes of individual items in the item index. For more detail, please refer to `TimeSeriesDataFrame` documentation: @@ -262,9 +268,9 @@ def predict( backend_kwargs = self.backend.parse_backend_predict_kwargs(backend_kwargs) return self.backend.predict( test_data=test_data, - id_column=id_column, - timestamp_column=timestamp_column, - target=target, + id_column=self.id_column, + timestamp_column=self.timestamp_column, + target=self.target_column, static_features=static_features, predictor_path=predictor_path, framework_version=framework_version, diff --git a/tests/unittests/timeseries/test_timeseries.py b/tests/unittests/timeseries/test_timeseries.py index d9ecc58a..dd58a238 100644 --- a/tests/unittests/timeseries/test_timeseries.py +++ b/tests/unittests/timeseries/test_timeseries.py @@ -7,16 +7,13 @@ def test_timeseries(test_helper, framework_version): train_data = "timeseries_train.csv" static_features = "timeseries_static_features.csv" - id_column = "item_id" - timestamp_column = "timestamp" - target = "target" timestamp = test_helper.get_utc_timestamp_now() with tempfile.TemporaryDirectory() as temp_dir: os.chdir(temp_dir) test_helper.prepare_data(train_data, static_features) time_limit = 60 - predictor_init_args = dict(target=target) + predictor_init_args = dict(target="target", prediction_length=3) predictor_fit_args = dict( train_data=train_data, @@ -35,25 +32,15 @@ def test_timeseries(test_helper, framework_version): predictor_fit_args, train_data, fit_kwargs=dict( - id_column=id_column, - timestamp_column=timestamp_column, static_features=static_features, framework_version=framework_version, custom_image_uri=training_custom_image_uri, ), deploy_kwargs=dict(framework_version=framework_version, custom_image_uri=inference_custom_image_uri), predict_kwargs=dict( - id_column=id_column, - timestamp_column=timestamp_column, - target=target, static_features=static_features, framework_version=framework_version, custom_image_uri=inference_custom_image_uri, ), - predict_real_time_kwargs=dict( - id_column=id_column, - timestamp_column=timestamp_column, - target=target, - static_features=static_features, - ), + predict_real_time_kwargs=dict(static_features=static_features), ) From 20309c05d8452985bac78d50becd4514e4b4d184 Mon Sep 17 00:00:00 2001 From: Oleksandr Shchur Date: Thu, 18 Jan 2024 14:42:40 +0000 Subject: [PATCH 2/2] Use bigger dataset in demo --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index c7d7301a..3e1ad0ad 100644 --- a/docs/index.md +++ b/docs/index.md @@ -110,7 +110,7 @@ result = cloud_predictor.predict(test_data) import pandas as pd from autogluon.cloud import TimeSeriesCloudPredictor -data = pd.read_csv("https://autogluon.s3.amazonaws.com/datasets/cloud/timeseries_train.csv") +data = pd.read_csv("https://autogluon.s3.amazonaws.com/datasets/timeseries/m4_hourly_tiny/train.csv") predictor_init_args = { "target": "target",