From 808a9b10bad12900f1e012e4ad4c3d918799d849 Mon Sep 17 00:00:00 2001 From: Sergei Dorogin Date: Tue, 23 Jun 2020 17:10:37 +0300 Subject: [PATCH 1/4] Update automl_tables_predict.py with batch_predict_bq sample Added a new method `batch_predict_bq` demonstrating running batch_prediction using BigQuery. Added notes in comments about asynchronicity for `batch_predict` method. --- tables/automl/automl_tables_predict.py | 48 ++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tables/automl/automl_tables_predict.py b/tables/automl/automl_tables_predict.py index 4a3423e3d537..90f6be12adf2 100644 --- a/tables/automl/automl_tables_predict.py +++ b/tables/automl/automl_tables_predict.py @@ -80,6 +80,46 @@ def predict( # [END automl_tables_predict] +def batch_predict_bq( + project_id, + compute_region, + model_display_name, + bq_input_uri, + bq_output_uri, +): + """Make a batch of predictions.""" + # [START automl_tables_batch_predict_bq] + # TODO(developer): Uncomment and set the following variables + # project_id = 'PROJECT_ID_HERE' + # compute_region = 'COMPUTE_REGION_HERE' + # model_display_name = 'MODEL_DISPLAY_NAME_HERE' + # bq_input_uri = 'bq://my-project.my-dataset.my-table' + # bq_output_uri = 'bq://my-project' + + from google.cloud import automl_v1beta1 as automl + + client = automl.TablesClient(project=project_id, region=compute_region) + + # Query model + response = client.batch_predict(bigquery_input_uri=bq_input_uri, + bigquery_output_uri=bq_output_uri, + model_display_name=model_display_name) + print("Making batch prediction... ") + # `response` is a async operation descriptor, + # you can register a callback for the operation to compelete via `add_done_callback`: + # def callback(operation_future): + # result = operation_future.result() + # response.add_done_callback(callback) + # + # or block the thread polling for the operation's results: + response.result() + # AutoML puts predictions in a newly generated dataset with a name by a mask "prediction_" + model_id + "_" + timestamp + # here's how to get the dataset name: + dataset_name = job.metadata.batch_predict_details.output_info.bigquery_output_dataset + + print("Batch prediction complete.\n{}".format(response.metadata)) + + # [END automl_tables_batch_predict_bq] def batch_predict( project_id, @@ -106,7 +146,15 @@ def batch_predict( gcs_output_uri_prefix=gcs_output_uri, model_display_name=model_display_name) print("Making batch prediction... ") + # `response` is a async operation descriptor, + # you can register a callback for the operation to compelete via `add_done_callback`: + # def callback(operation_future): + # result = operation_future.result() + # response.add_done_callback(callback) + # + # or block the thread polling for the operation's results: response.result() + print("Batch prediction complete.\n{}".format(response.metadata)) # [END automl_tables_batch_predict] From 37bd5bf755d87faf062aa645a2fdd7fa56677c93 Mon Sep 17 00:00:00 2001 From: Sergei Dorogin Date: Thu, 25 Jun 2020 15:00:52 +0300 Subject: [PATCH 2/4] fix: nox lint passing --- tables/automl/automl_tables_predict.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tables/automl/automl_tables_predict.py b/tables/automl/automl_tables_predict.py index 90f6be12adf2..0bce794451a7 100644 --- a/tables/automl/automl_tables_predict.py +++ b/tables/automl/automl_tables_predict.py @@ -80,6 +80,7 @@ def predict( # [END automl_tables_predict] + def batch_predict_bq( project_id, compute_region, @@ -105,8 +106,8 @@ def batch_predict_bq( bigquery_output_uri=bq_output_uri, model_display_name=model_display_name) print("Making batch prediction... ") - # `response` is a async operation descriptor, - # you can register a callback for the operation to compelete via `add_done_callback`: + # `response` is a async operation descriptor, + # you can register a callback for the operation to complete via `add_done_callback`: # def callback(operation_future): # result = operation_future.result() # response.add_done_callback(callback) @@ -115,12 +116,14 @@ def batch_predict_bq( response.result() # AutoML puts predictions in a newly generated dataset with a name by a mask "prediction_" + model_id + "_" + timestamp # here's how to get the dataset name: - dataset_name = job.metadata.batch_predict_details.output_info.bigquery_output_dataset - - print("Batch prediction complete.\n{}".format(response.metadata)) + dataset_name = response.metadata.batch_predict_details.output_info.bigquery_output_dataset + + print("Batch prediction complete.\nResults are in '{}' dataset.\n{}".format( + dataset_name, response.metadata)) # [END automl_tables_batch_predict_bq] + def batch_predict( project_id, compute_region, @@ -146,15 +149,15 @@ def batch_predict( gcs_output_uri_prefix=gcs_output_uri, model_display_name=model_display_name) print("Making batch prediction... ") - # `response` is a async operation descriptor, - # you can register a callback for the operation to compelete via `add_done_callback`: + # `response` is a async operation descriptor, + # you can register a callback for the operation to complete via `add_done_callback`: # def callback(operation_future): # result = operation_future.result() # response.add_done_callback(callback) # # or block the thread polling for the operation's results: response.result() - + print("Batch prediction complete.\n{}".format(response.metadata)) # [END automl_tables_batch_predict] From c214b065dda55f707d6a2d19ac488eda47ef113d Mon Sep 17 00:00:00 2001 From: sirtorry Date: Thu, 16 Jul 2020 21:48:51 -0700 Subject: [PATCH 3/4] add test --- tables/automl/batch_predict_test.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tables/automl/batch_predict_test.py b/tables/automl/batch_predict_test.py index 37b5f0e09c34..368baaa47891 100644 --- a/tables/automl/batch_predict_test.py +++ b/tables/automl/batch_predict_test.py @@ -30,7 +30,8 @@ STATIC_MODEL = model_test.STATIC_MODEL GCS_INPUT = "gs://{}-automl-tables-test/bank-marketing.csv".format(PROJECT) GCS_OUTPUT = "gs://{}-automl-tables-test/TABLE_TEST_OUTPUT/".format(PROJECT) - +BQ_INPUT = "bq://{}.automl_test.bank_marketing".format(PROJECT) +BQ_OUTPUT = "bq://{}".format(PROJECT) @pytest.mark.slow def test_batch_predict(capsys): @@ -41,6 +42,14 @@ def test_batch_predict(capsys): out, _ = capsys.readouterr() assert "Batch prediction complete" in out +@pytest.mark.slow +def test_batch_predict_bq(capsys): + ensure_model_online() + automl_tables_predict.batch_predict_bq( + PROJECT, REGION, STATIC_MODEL, BQ_INPUT, BQ_OUTPUT + ) + out, _ = capsys.readouterr() + assert "Batch prediction complete" in out def ensure_model_online(): model = model_test.ensure_model_ready() From b7c088fb91b09dbc5dbb1b4e9fe237ddac35e87a Mon Sep 17 00:00:00 2001 From: sirtorry Date: Fri, 17 Jul 2020 08:34:06 -0700 Subject: [PATCH 4/4] appease the linter --- tables/automl/batch_predict_test.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tables/automl/batch_predict_test.py b/tables/automl/batch_predict_test.py index 368baaa47891..f77404deefd2 100644 --- a/tables/automl/batch_predict_test.py +++ b/tables/automl/batch_predict_test.py @@ -33,6 +33,7 @@ BQ_INPUT = "bq://{}.automl_test.bank_marketing".format(PROJECT) BQ_OUTPUT = "bq://{}".format(PROJECT) + @pytest.mark.slow def test_batch_predict(capsys): ensure_model_online() @@ -42,6 +43,7 @@ def test_batch_predict(capsys): out, _ = capsys.readouterr() assert "Batch prediction complete" in out + @pytest.mark.slow def test_batch_predict_bq(capsys): ensure_model_online() @@ -51,6 +53,7 @@ def test_batch_predict_bq(capsys): out, _ = capsys.readouterr() assert "Batch prediction complete" in out + def ensure_model_online(): model = model_test.ensure_model_ready() if model.deployment_state != enums.Model.DeploymentState.DEPLOYED: