diff --git a/src/autogluon/cloud/__init__.py b/src/autogluon/cloud/__init__.py index e7d4a7a..8e8ccea 100644 --- a/src/autogluon/cloud/__init__.py +++ b/src/autogluon/cloud/__init__.py @@ -1,5 +1,5 @@ from autogluon.common.utils.log_utils import _add_stream_handler -from .predictor import MultiModalCloudPredictor, TabularCloudPredictor +from .predictor import MultiModalCloudPredictor, TabularCloudPredictor, TimeSeriesCloudPredictor _add_stream_handler() diff --git a/src/autogluon/cloud/predictor/__init__.py b/src/autogluon/cloud/predictor/__init__.py index 3b7eca0..1b128e3 100644 --- a/src/autogluon/cloud/predictor/__init__.py +++ b/src/autogluon/cloud/predictor/__init__.py @@ -1,2 +1,3 @@ from .multimodal_cloud_predictor import MultiModalCloudPredictor from .tabular_cloud_predictor import TabularCloudPredictor +from .timeseries_cloud_predictor import TimeSeriesCloudPredictor diff --git a/src/autogluon/cloud/predictor/cloud_predictor.py b/src/autogluon/cloud/predictor/cloud_predictor.py index 2580aeb..717ee87 100644 --- a/src/autogluon/cloud/predictor/cloud_predictor.py +++ b/src/autogluon/cloud/predictor/cloud_predictor.py @@ -787,10 +787,13 @@ def _load_predict_real_time_test_data(self, test_data): return test_data - def _predict_real_time(self, test_data, accept, **initial_args): + def _predict_real_time(self, test_data, accept, split_pred_proba=True, **initial_args): try: prediction = self.endpoint.predict(test_data, initial_args={"Accept": accept, **initial_args}) - pred, pred_proba = split_pred_and_pred_proba(prediction) + pred, pred_proba = None, None + pred = prediction + if split_pred_proba: + pred, pred_proba = split_pred_and_pred_proba(prediction) return pred, pred_proba except ClientError as e: if e.response["Error"]["Code"] == "413": # Error code for pay load too large @@ -904,6 +907,7 @@ def _predict( save_path=None, model_kwargs=None, transformer_kwargs=None, + split_pred_proba=True, **kwargs, ): if not predictor_path: @@ -1024,7 +1028,9 @@ def _predict( results_path = self.download_predict_results(save_path=save_path) # Batch inference will only return json format results = pd.read_json(results_path) - pred, pred_proba = split_pred_and_pred_proba(results) + pred = results + if split_pred_proba: + pred, pred_proba = split_pred_and_pred_proba(results) if not persist: os.remove(results_path) @@ -1052,6 +1058,7 @@ def predict( Predict using SageMaker batch transform. When minimizing latency isn't a concern, then the batch transform functionality may be easier, more scalable, and more appropriate. If you want to minimize latency, use `predict_real_time()` instead. + To learn more: https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform.html This method would first create a AutoGluonSagemakerInferenceModel with the trained predictor, then create a transformer with it, and call transform in the end. @@ -1152,6 +1159,7 @@ def predict_proba( Predict using SageMaker batch transform. When minimizing latency isn't a concern, then the batch transform functionality may be easier, more scalable, and more appropriate. If you want to minimize latency, use `predict_real_time()` instead. + To learn more: https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform.html This method would first create a AutoGluonSagemakerInferenceModel with the trained predictor, then create a transformer with it, and call transform in the end. diff --git a/src/autogluon/cloud/predictor/multimodal_cloud_predictor.py b/src/autogluon/cloud/predictor/multimodal_cloud_predictor.py index 750ab4f..9aa010a 100644 --- a/src/autogluon/cloud/predictor/multimodal_cloud_predictor.py +++ b/src/autogluon/cloud/predictor/multimodal_cloud_predictor.py @@ -163,6 +163,15 @@ def predict( **kwargs, ) -> Optional[pd.Series]: """ + Predict using SageMaker batch transform. + When minimizing latency isn't a concern, then the batch transform functionality may be easier, more scalable, and more appropriate. + If you want to minimize latency, use `predict_real_time()` instead. + To learn more: https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform.html + This method would first create a AutoGluonSagemakerInferenceModel with the trained predictor, + then create a transformer with it, and call transform in the end. + + Parameters + ---------- test_data: str The test data to be inferenced. Can be a pandas.DataFrame or a local path to a csv file. @@ -202,6 +211,13 @@ def predict_proba( **kwargs, ) -> Optional[Union[Tuple[pd.Series, Union[pd.DataFrame, pd.Series]], Union[pd.DataFrame, pd.Series]]]: """ + Predict using SageMaker batch transform. + When minimizing latency isn't a concern, then the batch transform functionality may be easier, more scalable, and more appropriate. + If you want to minimize latency, use `predict_real_time()` instead. + To learn more: https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform.html + This method would first create a AutoGluonSagemakerInferenceModel with the trained predictor, + then create a transformer with it, and call transform in the end. + test_data: str The test data to be inferenced. Can be a pandas.DataFrame or a local path to a csv file. diff --git a/src/autogluon/cloud/predictor/timeseries_cloud_predictor.py b/src/autogluon/cloud/predictor/timeseries_cloud_predictor.py new file mode 100644 index 0000000..e391ac8 --- /dev/null +++ b/src/autogluon/cloud/predictor/timeseries_cloud_predictor.py @@ -0,0 +1,271 @@ +from __future__ import annotations + +import copy +import logging +from typing import Any, Dict, Optional, Union + +import pandas as pd + +from autogluon.common.loaders import load_pd + +from .cloud_predictor import CloudPredictor + +logger = logging.getLogger(__name__) + + +class TimeSeriesCloudPredictor(CloudPredictor): + predictor_file_name = "TimeSeriesCloudPredictor.pkl" + + @property + def predictor_type(self): + """ + Type of the underneath AutoGluon Predictor + """ + return "timeseries" + + def _get_local_predictor_cls(self): + from autogluon.timeseries import TimeSeriesPredictor + + predictor_cls = TimeSeriesPredictor + return predictor_cls + + def _preprocess_data( + self, + data: Union[pd.DataFrame, str], + id_column: str, + timestamp_column: str, + target: str, + static_features: Optional[Union[pd.DataFrame, str]] = None, + ) -> pd.DataFrame: + if isinstance(data, str): + data = load_pd.load(data) + else: + data = copy.copy(data) + cols = data.columns.to_list() + # Make sure id and timestamp columns are the first two columns, and target column is in the end + # This is to ensure in the container we know how to find id and timestamp columns, and whether there are static features being merged + timestamp_index = cols.index(timestamp_column) + cols.insert(0, cols.pop(timestamp_index)) + id_index = cols.index(id_column) + cols.insert(0, cols.pop(id_index)) + target_index = cols.index(target) + cols.append(cols.pop(target_index)) + data = data[cols] + + if static_features is not None: + # Merge static features so only one dataframe needs to be sent to remote container + if isinstance(static_features, str): + static_features = load_pd.load(static_features) + data = pd.merge(data, static_features, how="left", on=id_column) + + return data + + def fit( + self, + *, + predictor_init_args: Dict[str, Any], + predictor_fit_args: Dict[str, Any], + id_column: str, + timestamp_column: str, + static_features: Optional[Union[str, pd.DataFrame]] = None, + framework_version: str = "latest", + job_name: Optional[str] = None, + instance_type: str = "ml.m5.2xlarge", + instance_count: int = 1, + volume_size: int = 100, + custom_image_uri: Optional[str] = None, + wait: bool = True, + autogluon_sagemaker_estimator_kwargs: Dict = None, + **kwargs, + ) -> TimeSeriesCloudPredictor: + """ + Fit the predictor with SageMaker. + This function will first upload necessary config and train data to s3 bucket. + Then launch a SageMaker training job with the AutoGluon training container. + + Parameters + ---------- + predictor_init_args: dict + Init args for the predictor + predictor_fit_args: dict + Fit args for the predictor + id_column: str + Name of the 'item_id' column + timestamp_column: str + Name of the 'timestamp' column + static_features: Optional[pd.DataFrame] + An optional data frame describing the metadata attributes of individual items in the item index. + For more detail, please refer to `TimeSeriesDataFrame` documentation: + https://auto.gluon.ai/stable/api/autogluon.predictor.html#timeseriesdataframe + framework_version: str, default = `latest` + Training container version of autogluon. + If `latest`, will use the latest available container version. + If provided a specific version, will use this version. + If `custom_image_uri` is set, this argument will be ignored. + job_name: str, default = None + Name of the launched training job. + If None, CloudPredictor will create one with prefix ag-cloudpredictor + instance_type: str, default = 'ml.m5.2xlarge' + Instance type the predictor will be trained on with SageMaker. + instance_count: int, default = 1 + Number of instance used to fit the predictor. + volumes_size: int, default = 30 + Size in GB of the EBS volume to use for storing input data during training (default: 30). + Must be large enough to store training data if File Mode is used (which is the default). + wait: bool, default = True + Whether the call should wait until the job completes + To be noticed, the function won't return immediately because there are some preparations needed prior fit. + Use `get_fit_job_status` to get job status. + autogluon_sagemaker_estimator_kwargs: dict, default = dict() + Any extra arguments needed to initialize AutoGluonSagemakerEstimator + Please refer to https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html#sagemaker.estimator.Framework for all options + **kwargs: + Any extra arguments needed to pass to fit. + Please refer to https://sagemaker.readthedocs.io/en/stable/api/training/estimators.html#sagemaker.estimator.Framework.fit for all options + + Returns + ------- + `TimeSeriesCloudPredictor` object. Returns self. + """ + predictor_fit_args = copy.deepcopy(predictor_fit_args) + train_data = predictor_fit_args.pop("train_data") + tuning_data = predictor_fit_args.pop("tuning_data", None) + target = predictor_init_args.get("target") + train_data = self._preprocess_data( + data=train_data, + id_column=id_column, + timestamp_column=timestamp_column, + target=target, + static_features=static_features, + ) + if tuning_data is not None: + tuning_data = self._preprocess_data( + data=tuning_data, + id_column=id_column, + timestamp_column=timestamp_column, + target=target, + static_features=static_features, + ) + predictor_fit_args["train_data"] = train_data + predictor_fit_args["tuning_data"] = tuning_data + print(train_data) + return super().fit( + predictor_init_args=predictor_init_args, + predictor_fit_args=predictor_fit_args, + framework_version=framework_version, + job_name=job_name, + instance_type=instance_type, + instance_count=instance_count, + volume_size=volume_size, + custom_image_uri=custom_image_uri, + wait=wait, + autogluon_sagemaker_estimator_kwargs=autogluon_sagemaker_estimator_kwargs, + **kwargs, + ) + + def predict_real_time( + self, + test_data: Union[str, pd.DataFrame], + id_column: str, + timestamp_column: str, + target: str, + static_features: Optional[Union[str, pd.DataFrame]] = None, + accept: str = "application/x-parquet", + ) -> pd.DataFrame: + """ + Predict with the deployed SageMaker endpoint. A deployed SageMaker endpoint is required. + This is intended to provide a low latency inference. + If you want to inference on a large dataset, use `predict()` instead. + + Parameters + ---------- + test_data: Union(str, pandas.DataFrame) + The test data to be inferenced. + Can be a pandas.DataFrame or a local path to a csv file. + id_column: str + Name of the 'item_id' column + timestamp_column: str + Name of the 'timestamp' column + static_features: Optional[pd.DataFrame] + An optional data frame describing the metadata attributes of individual items in the item index. + For more detail, please refer to `TimeSeriesDataFrame` documentation: + https://auto.gluon.ai/stable/api/autogluon.predictor.html#timeseriesdataframe + target: str + Name of column that contains the target values to forecast + accept: str, default = application/x-parquet + Type of accept output content. + Valid options are application/x-parquet, text/csv, application/json + + Returns + ------- + Pandas.DataFrame + Predict results in DataFrame + """ + self._validate_predict_real_time_args(accept) + test_data = self._preprocess_data( + data=test_data, + id_column=id_column, + timestamp_column=timestamp_column, + target=target, + static_features=static_features, + ) + pred, _ = self._predict_real_time(test_data=test_data, accept=accept, split_pred_proba=False) + return pred + + def predict_proba_real_time(self, **kwargs) -> pd.DataFrame: + raise ValueError(f"{self.__class__.__name__} does not support predict_proba operation.") + + def predict( + self, + test_data: Union[str, pd.DataFrame], + id_column: str, + timestamp_column: str, + target: str, + static_features: Optional[Union[str, pd.DataFrame]] = None, + **kwargs, + ) -> Optional[pd.DataFrame]: + """ + Predict using SageMaker batch transform. + When minimizing latency isn't a concern, then the batch transform functionality may be easier, more scalable, and more appropriate. + If you want to minimize latency, use `predict_real_time()` instead. + To learn more: https://docs.aws.amazon.com/sagemaker/latest/dg/batch-transform.html + This method would first create a AutoGluonSagemakerInferenceModel with the trained predictor, + then create a transformer with it, and call transform in the end. + + Parameters + ---------- + test_data: str + The test data to be inferenced. + Can be a pandas.DataFrame or a local path to a csv file. + id_column: str + Name of the 'item_id' column + timestamp_column: str + Name of the 'timestamp' column + static_features: Optional[Union[str, pd.DataFrame]] + An optional data frame describing the metadata attributes of individual items in the item index. + For more detail, please refer to `TimeSeriesDataFrame` documentation: + https://auto.gluon.ai/stable/api/autogluon.predictor.html#timeseriesdataframe + target: str + Name of column that contains the target values to forecast + kwargs: + Refer to `CloudPredictor.predict()` + """ + test_data = self._preprocess_data( + data=test_data, + id_column=id_column, + timestamp_column=timestamp_column, + target=target, + static_features=static_features, + ) + pred, _ = super()._predict( + test_data=test_data, + split_pred_proba=False, + **kwargs, + ) + return pred + + def predict_proba( + self, + **kwargs, + ) -> Optional[pd.DataFrame]: + raise ValueError(f"{self.__class__.__name__} does not support predict_proba operation.") diff --git a/src/autogluon/cloud/scripts/script_manager.py b/src/autogluon/cloud/scripts/script_manager.py index 7ee9123..8162db5 100644 --- a/src/autogluon/cloud/scripts/script_manager.py +++ b/src/autogluon/cloud/scripts/script_manager.py @@ -8,18 +8,20 @@ class ScriptManager: TRAIN_SCRIPT_PATH = os.path.join(SCRIPTS_PATH, "train.py") TABULAR_SERVE_SCRIPT_PATH = os.path.join(SCRIPTS_PATH, "tabular_serve.py") MULTIMODAL_SERVE_SCRIPT_PATH = os.path.join(SCRIPTS_PATH, "multimodal_serve.py") + TIMESERIES_SERVE_SCRIPT_PATH = os.path.join(SCRIPTS_PATH, "timeseries_serve.py") _SERVE_SCRIPT_MAP = dict( tabular=TABULAR_SERVE_SCRIPT_PATH, multimodal=MULTIMODAL_SERVE_SCRIPT_PATH, + timeseries=TIMESERIES_SERVE_SCRIPT_PATH, ) @classmethod def get_train_script(cls, predictor_type, framework_version): - assert predictor_type in ["tabular", "multimodal"] - # tabular, multimodal ßshare the same training script + assert predictor_type in ["tabular", "multimodal", "timeseries"] + # tabular, multimodal, timeseries share the same training script return cls.TRAIN_SCRIPT_PATH @classmethod def get_serve_script(cls, predictor_type, framework_version): - assert predictor_type in ["tabular", "multimodal"] + assert predictor_type in ["tabular", "multimodal", "timeseries"] return cls._SERVE_SCRIPT_MAP[predictor_type] diff --git a/src/autogluon/cloud/scripts/timeseries_serve.py b/src/autogluon/cloud/scripts/timeseries_serve.py new file mode 100644 index 0000000..cf43635 --- /dev/null +++ b/src/autogluon/cloud/scripts/timeseries_serve.py @@ -0,0 +1,83 @@ +# flake8: noqa +import os +import shutil +from io import BytesIO, StringIO + +import pandas as pd + +from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor + + +def model_fn(model_dir): + """loads model from previously saved artifact""" + # TSPredictor will write to the model file during inference while the default model_dir is read only + # Copy the model file to a writable location as a temporary workaround + tmp_model_dir = os.path.join("/tmp", "model") + try: + shutil.copytree(model_dir, tmp_model_dir, dirs_exist_ok=False) + except: + # model already copied + pass + model = TimeSeriesPredictor.load(tmp_model_dir) + print("MODEL LOADED") + return model + + +def prepare_timeseries_dataframe(df, predictor): + target = predictor.target + cols = df.columns.to_list() + id_column = cols[0] + timestamp_column = cols[1] + df[timestamp_column] = pd.to_datetime(df[timestamp_column]) + static_features = None + if target != cols[-1]: + # target is not the last column, then there are static features being merged in + target_index = cols.index(target) + static_columns = cols[target_index + 1 :] + static_features = df[[id_column] + static_columns].groupby([id_column], sort=False).head(1) + static_features.set_index(id_column, inplace=True) + df.drop(columns=static_columns, inplace=True) + df = TimeSeriesDataFrame.from_data_frame(df, id_column=id_column, timestamp_column=timestamp_column) + if static_features is not None: + df.static_features = static_features + return df + + +def transform_fn(model, request_body, input_content_type, output_content_type="application/json"): + if input_content_type == "application/x-parquet": + buf = BytesIO(request_body) + data = pd.read_parquet(buf) + + elif input_content_type == "text/csv": + buf = StringIO(request_body) + data = pd.read_csv(buf) + + elif input_content_type == "application/json": + buf = StringIO(request_body) + data = pd.read_json(buf) + + elif input_content_type == "application/jsonl": + buf = StringIO(request_body) + data = pd.read_json(buf, orient="records", lines=True) + + else: + raise ValueError(f"{input_content_type} input content type not supported.") + + data = prepare_timeseries_dataframe(data, model) + prediction = model.predict(data) + prediction = pd.DataFrame(prediction) + + if "application/x-parquet" in output_content_type: + prediction.columns = prediction.columns.astype(str) + output = prediction.to_parquet() + output_content_type = "application/x-parquet" + elif "application/json" in output_content_type: + output = prediction.to_json() + output_content_type = "application/json" + elif "text/csv" in output_content_type: + output = prediction.to_csv(index=None) + output_content_type = "text/csv" + else: + raise ValueError(f"{output_content_type} content type not supported") + + return output, output_content_type diff --git a/src/autogluon/cloud/scripts/train.py b/src/autogluon/cloud/scripts/train.py index 4915e43..4c0e9cd 100644 --- a/src/autogluon/cloud/scripts/train.py +++ b/src/autogluon/cloud/scripts/train.py @@ -4,11 +4,13 @@ # https://github.com/autogluon/autogluon/issues/2042 import argparse import os +import pandas as pd import shutil from pprint import pprint import yaml +from autogluon.common.loaders import load_pd from autogluon.tabular import TabularPredictor, TabularDataset, FeatureMetadata @@ -28,6 +30,37 @@ def get_env_if_present(name): return result +def prepare_timeseries_dataframe(df, predictor_init_args): + target = predictor_init_args["target"] + cols = df.columns.to_list() + id_column = cols[0] + timestamp_column = cols[1] + df[timestamp_column] = pd.to_datetime(df[timestamp_column]) + static_features = None + if target != cols[-1]: + # target is not the last column, then there are static features being merged in + target_index = cols.index(target) + static_columns = cols[target_index + 1 :] + static_features = df[[id_column] + static_columns].groupby([id_column], sort=False).head(1) + static_features.set_index(id_column, inplace=True) + df.drop(columns=static_columns, inplace=True) + df = TimeSeriesDataFrame.from_data_frame(df, id_column=id_column, timestamp_column=timestamp_column) + if static_features is not None: + print(static_features) + df.static_features = static_features + return df + + +def prepare_data(data_file, predictor_type, predictor_init_args=None): + if predictor_type == "timeseries": + assert predictor_init_args is not None + data = load_pd.load(data_file) + data = prepare_timeseries_dataframe(data, predictor_init_args) + else: + data = TabularDataset(data_file) + return data + + if __name__ == "__main__": # Disable Autotune os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "0" @@ -74,7 +107,7 @@ def get_env_if_present(name): predictor_init_args = config["predictor_init_args"] predictor_init_args["path"] = save_path predictor_fit_args = config["predictor_fit_args"] - valid_predictor_types = ["tabular", "multimodal"] + valid_predictor_types = ["tabular", "multimodal", "timeseries"] assert ( predictor_type in valid_predictor_types ), f"predictor_type {predictor_type} not supported. Valid options are {valid_predictor_types}" @@ -86,9 +119,14 @@ def get_env_if_present(name): from autogluon.multimodal import MultiModalPredictor predictor_cls = MultiModalPredictor + elif predictor_type == "timeseries": + from autogluon.timeseries import TimeSeriesPredictor, TimeSeriesDataFrame + + predictor_cls = TimeSeriesPredictor train_file = get_input_path(args.train_dir) - training_data = TabularDataset(train_file) + training_data = prepare_data(train_file, predictor_type, predictor_init_args) + if predictor_type == "tabular" and "image_column" in config: feature_metadata = predictor_fit_args.get("feature_metadata", None) if feature_metadata is None: @@ -99,7 +137,7 @@ def get_env_if_present(name): tuning_data = None if args.tune_dir: tune_file = get_input_path(args.tune_dir) - tuning_data = TabularDataset(tune_file) + tuning_data = prepare_data(tune_file, predictor_type) if args.train_images: train_image_compressed_file = get_input_path(args.train_images) diff --git a/tests/conftest.py b/tests/conftest.py index 27a2720..cbc5071 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -6,6 +6,8 @@ import pandas as pd import pytest +from autogluon.cloud import TimeSeriesCloudPredictor + class CloudTestHelper: cpu_training_image = "369469875935.dkr.ecr.us-east-1.amazonaws.com/autogluon-nightly-training:cpu-latest" @@ -62,10 +64,14 @@ def replace_image_abspath(data, image_column): @staticmethod def test_endpoint(cloud_predictor, test_data, **predict_real_time_kwargs): try: - pred = cloud_predictor.predict_real_time(test_data, **predict_real_time_kwargs) - assert isinstance(pred, pd.Series) - pred_proba = cloud_predictor.predict_proba_real_time(test_data, **predict_real_time_kwargs) - assert isinstance(pred_proba, pd.DataFrame) + if isinstance(cloud_predictor, TimeSeriesCloudPredictor): + pred = cloud_predictor.predict_real_time(test_data, **predict_real_time_kwargs) + assert isinstance(pred, pd.DataFrame) + else: + pred = cloud_predictor.predict_real_time(test_data, **predict_real_time_kwargs) + assert isinstance(pred, pd.Series) + pred_proba = cloud_predictor.predict_proba_real_time(test_data, **predict_real_time_kwargs) + assert isinstance(pred_proba, pd.DataFrame) except Exception as e: cloud_predictor.cleanup_deployment() # cleanup endpoint if test failed raise e @@ -110,8 +116,12 @@ def test_basic_functionality( if predict_kwargs is None: predict_kwargs = dict() - pred, pred_proba = cloud_predictor.predict_proba(test_data, **predict_kwargs) - assert isinstance(pred, pd.Series) and isinstance(pred_proba, pd.DataFrame) + if isinstance(cloud_predictor, TimeSeriesCloudPredictor): + pred = cloud_predictor.predict(test_data, **predict_kwargs) + assert isinstance(pred, pd.DataFrame) + else: + pred, pred_proba = cloud_predictor.predict_proba(test_data, **predict_kwargs) + assert isinstance(pred, pd.Series) and isinstance(pred_proba, pd.DataFrame) info = cloud_predictor.info() assert info["recent_transform_job"]["status"] == "Completed" diff --git a/tests/test_check_style.py b/tests/test_check_style.py deleted file mode 100644 index 0ad5614..0000000 --- a/tests/test_check_style.py +++ /dev/null @@ -1,15 +0,0 @@ -import logging -import warnings -from subprocess import PIPE, Popen - - -def test_check_style(): - logging.getLogger().setLevel(logging.INFO) - logging.info("PEP8 Style check") - flake8_proc = Popen(["flake8", "--count", "--max-line-length", "300"], stdout=PIPE) - flake8_out = flake8_proc.communicate()[0] - lines = flake8_out.splitlines() - count = int(lines[-1].decode()) - if count > 0: - warnings.warn(f"{count} PEP8 warnings remaining") - assert count < 1000, "Too many PEP8 warnings found, improve code quality to pass test." diff --git a/tests/unittests/general/test_full_functionality.py b/tests/unittests/general/test_full_functionality.py index 7362664..bebc13c 100644 --- a/tests/unittests/general/test_full_functionality.py +++ b/tests/unittests/general/test_full_functionality.py @@ -25,11 +25,11 @@ def test_full_functionality(test_helper, framework_version): time_limit=time_limit, ) cloud_predictor = TabularCloudPredictor( - cloud_output_path=f"s3://autogluon-cloud-ci/test-tabular/{timestamp}", + cloud_output_path=f"s3://autogluon-cloud-ci/test-tabular/{framework_version}/{timestamp}", local_output_path="test_tabular_cloud_predictor", ) cloud_predictor_no_train = TabularCloudPredictor( - cloud_output_path=f"s3://autogluon-cloud-ci/test-tabular-no-train/{timestamp}", + cloud_output_path=f"s3://autogluon-cloud-ci/test-tabular-no-train/{framework_version}/{timestamp}", local_output_path="test_tabular_cloud_predictor_no_train", ) training_custom_image_uri = test_helper.get_custom_image_uri(framework_version, type="training", gpu=False) diff --git a/tests/unittests/image/test_image.py b/tests/unittests/image/test_image.py index d1cba0f..aa2b16b 100644 --- a/tests/unittests/image/test_image.py +++ b/tests/unittests/image/test_image.py @@ -21,7 +21,7 @@ def test_multimodal_image_only(test_helper, framework_version="source"): predictor_init_args = dict(label="label", eval_metric="acc") predictor_fit_args = dict(train_data=train_data, time_limit=time_limit) cloud_predictor = MultiModalCloudPredictor( - cloud_output_path=f"s3://autogluon-cloud-ci/test-multimodal-image/{timestamp}", + cloud_output_path=f"s3://autogluon-cloud-ci/test-multimodal-image/{framework_version}/{timestamp}", local_output_path="test_multimodal_image_cloud_predictor", ) training_custom_image_uri = test_helper.get_custom_image_uri(framework_version, type="training", gpu=True) diff --git a/tests/unittests/multimodal/test_multimodal.py b/tests/unittests/multimodal/test_multimodal.py index ce78025..86bf82e 100644 --- a/tests/unittests/multimodal/test_multimodal.py +++ b/tests/unittests/multimodal/test_multimodal.py @@ -23,7 +23,7 @@ def test_multimodal_tabular_text_image(test_helper, framework_version): ) predictor_fit_args = dict(train_data=train_data, time_limit=time_limit) cloud_predictor = MultiModalCloudPredictor( - cloud_output_path=f"s3://autogluon-cloud-ci/test-multimodal-tabular-text-image/{timestamp}", + cloud_output_path=f"s3://autogluon-cloud-ci/test-multimodal-tabular-text-image/{framework_version}/{timestamp}", local_output_path="test_multimodal_tabular_text_image_cloud_predictor", ) training_custom_image_uri = test_helper.get_custom_image_uri(framework_version, type="training", gpu=True) diff --git a/tests/unittests/tabular/test_tabular.py b/tests/unittests/tabular/test_tabular.py index 749df45..19ff5bd 100644 --- a/tests/unittests/tabular/test_tabular.py +++ b/tests/unittests/tabular/test_tabular.py @@ -33,7 +33,7 @@ def test_tabular_tabular_text_image(test_helper, framework_version): }, ) cloud_predictor = TabularCloudPredictor( - cloud_output_path=f"s3://autogluon-cloud-ci/test-tabular-tabular-text-image/{timestamp}", + cloud_output_path=f"s3://autogluon-cloud-ci/test-tabular-tabular-text-image/{framework_version}/{timestamp}", local_output_path="test_tabular_tabular_text_image_cloud_predictor", ) training_custom_image_uri = test_helper.get_custom_image_uri(framework_version, type="training", gpu=True) diff --git a/tests/unittests/text/test_text.py b/tests/unittests/text/test_text.py index 4522888..8afb2be 100644 --- a/tests/unittests/text/test_text.py +++ b/tests/unittests/text/test_text.py @@ -17,7 +17,7 @@ def test_multimodal_text_only(test_helper, framework_version): predictor_init_args = dict(label="label", eval_metric="acc") predictor_fit_args = dict(train_data=train_data, tuning_data=tune_data, time_limit=time_limit) cloud_predictor = MultiModalCloudPredictor( - cloud_output_path=f"s3://autogluon-cloud-ci/test-multimodal-text/{timestamp}", + cloud_output_path=f"s3://autogluon-cloud-ci/test-multimodal-text/{framework_version}/{timestamp}", local_output_path="test_multimodal_text_cloud_predictor", ) training_custom_image_uri = test_helper.get_custom_image_uri(framework_version, type="training", gpu=True) diff --git a/tests/unittests/timeseries/test_timeseries.py b/tests/unittests/timeseries/test_timeseries.py index 3c1f51b..d9ecc58 100644 --- a/tests/unittests/timeseries/test_timeseries.py +++ b/tests/unittests/timeseries/test_timeseries.py @@ -1,2 +1,59 @@ -def test_timeseries(): - pass +import os +import tempfile + +from autogluon.cloud import TimeSeriesCloudPredictor + + +def test_timeseries(test_helper, framework_version): + train_data = "timeseries_train.csv" + static_features = "timeseries_static_features.csv" + id_column = "item_id" + timestamp_column = "timestamp" + target = "target" + timestamp = test_helper.get_utc_timestamp_now() + with tempfile.TemporaryDirectory() as temp_dir: + os.chdir(temp_dir) + test_helper.prepare_data(train_data, static_features) + time_limit = 60 + + predictor_init_args = dict(target=target) + + predictor_fit_args = dict( + train_data=train_data, + presets="medium_quality", + time_limit=time_limit, + ) + cloud_predictor = TimeSeriesCloudPredictor( + cloud_output_path=f"s3://autogluon-cloud-ci/test-timeseries/{framework_version}/{timestamp}", + local_output_path="test_timeseries_cloud_predictor", + ) + training_custom_image_uri = test_helper.get_custom_image_uri(framework_version, type="training", gpu=False) + inference_custom_image_uri = test_helper.get_custom_image_uri(framework_version, type="inference", gpu=False) + test_helper.test_basic_functionality( + cloud_predictor, + predictor_init_args, + predictor_fit_args, + train_data, + fit_kwargs=dict( + id_column=id_column, + timestamp_column=timestamp_column, + static_features=static_features, + framework_version=framework_version, + custom_image_uri=training_custom_image_uri, + ), + deploy_kwargs=dict(framework_version=framework_version, custom_image_uri=inference_custom_image_uri), + predict_kwargs=dict( + id_column=id_column, + timestamp_column=timestamp_column, + target=target, + static_features=static_features, + framework_version=framework_version, + custom_image_uri=inference_custom_image_uri, + ), + predict_real_time_kwargs=dict( + id_column=id_column, + timestamp_column=timestamp_column, + target=target, + static_features=static_features, + ), + )