diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/README.md b/packages/google-cloud-retail/samples/interactive-tutorials/README.md index ca7204f71298..70fb2f3cd806 100644 --- a/packages/google-cloud-retail/samples/interactive-tutorials/README.md +++ b/packages/google-cloud-retail/samples/interactive-tutorials/README.md @@ -124,7 +124,7 @@ The bucket name must be unique. For convenience, you can name it ` str: + return prefixer.create_prefix() diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_big_query_table.py b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_big_query_table.py index 39b0d6f6b03c..d66fce9e3d6a 100644 --- a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_big_query_table.py +++ b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_big_query_table.py @@ -12,90 +12,104 @@ # See the License for the specific language governing permissions and # limitations under the License. -# [START retail_import_products_from_big_query] -# Import products into a catalog from big query table using Retail API -# +import argparse import os -import time - -from google.cloud.retail import ( - BigQuerySource, - ImportProductsRequest, - ProductInputConfig, - ProductServiceClient, -) -project_number = os.environ["GOOGLE_CLOUD_PROJECT_NUMBER"] project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -default_catalog = f"projects/{project_number}/locations/global/catalogs/default_catalog/branches/default_branch" -dataset_id = "products" -table_id = "products" +def main(project_id, dataset_id, table_id): + # [START retail_import_products_from_big_query] + # TODO: Set project_id to your Google Cloud Platform project ID. + # project_id = "my-project" -# TO CHECK ERROR HANDLING USE THE TABLE WITH INVALID PRODUCTS: -# table_id = "products_some_invalid" + # TODO: Set dataset_id + # dataset_id = "products" + # TODO: Set dataset_id + # table_id = "products" -# get import products from big query request -def get_import_products_big_query_request(reconciliation_mode): - # TO CHECK ERROR HANDLING PASTE THE INVALID CATALOG NAME HERE: - # default_catalog = "invalid_catalog_name" - big_query_source = BigQuerySource() - big_query_source.project_id = project_id - big_query_source.dataset_id = dataset_id - big_query_source.table_id = table_id - big_query_source.data_schema = "product" + # Import products into a catalog from big query table using Retail API + import time - input_config = ProductInputConfig() - input_config.big_query_source = big_query_source + from google.cloud.retail import ( + BigQuerySource, + ImportProductsRequest, + ProductInputConfig, + ProductServiceClient, + ) - import_request = ImportProductsRequest() - import_request.parent = default_catalog - import_request.reconciliation_mode = reconciliation_mode - import_request.input_config = input_config + default_catalog = f"projects/{project_id}/locations/global/catalogs/default_catalog/branches/default_branch" - print("---import products from big query table request---") - print(import_request) + # TO CHECK ERROR HANDLING USE THE TABLE WITH INVALID PRODUCTS: + # table_id = "products_some_invalid" - return import_request + # get import products from big query request + def get_import_products_big_query_request(reconciliation_mode): + # TO CHECK ERROR HANDLING PASTE THE INVALID CATALOG NAME HERE: + # default_catalog = "invalid_catalog_name" + big_query_source = BigQuerySource() + big_query_source.project_id = project_id + big_query_source.dataset_id = dataset_id + big_query_source.table_id = table_id + big_query_source.data_schema = "product" + input_config = ProductInputConfig() + input_config.big_query_source = big_query_source -# call the Retail API to import products -def import_products_from_big_query(): - # TRY THE FULL RECONCILIATION MODE HERE: - reconciliation_mode = ImportProductsRequest.ReconciliationMode.INCREMENTAL + import_request = ImportProductsRequest() + import_request.parent = default_catalog + import_request.reconciliation_mode = reconciliation_mode + import_request.input_config = input_config - import_big_query_request = get_import_products_big_query_request( - reconciliation_mode - ) - big_query_operation = ProductServiceClient().import_products( - import_big_query_request - ) + print("---import products from big query table request---") + print(import_request) - print("---the operation was started:----") - print(big_query_operation.operation.name) + return import_request - while not big_query_operation.done(): - print("---please wait till operation is done---") - time.sleep(30) - print("---import products operation is done---") + # call the Retail API to import products + def import_products_from_big_query(): + # TRY THE FULL RECONCILIATION MODE HERE: + reconciliation_mode = ImportProductsRequest.ReconciliationMode.INCREMENTAL - if big_query_operation.metadata is not None: - print("---number of successfully imported products---") - print(big_query_operation.metadata.success_count) - print("---number of failures during the importing---") - print(big_query_operation.metadata.failure_count) - else: - print("---operation.metadata is empty---") + import_big_query_request = get_import_products_big_query_request( + reconciliation_mode + ) + big_query_operation = ProductServiceClient().import_products( + import_big_query_request + ) - if big_query_operation.result is not None: - print("---operation result:---") - print(big_query_operation.result()) - else: - print("---operation.result is empty---") + print("---the operation was started:----") + print(big_query_operation.operation.name) + while not big_query_operation.done(): + print("---please wait till operation is done---") + time.sleep(30) + print("---import products operation is done---") + + if big_query_operation.metadata is not None: + print("---number of successfully imported products---") + print(big_query_operation.metadata.success_count) + print("---number of failures during the importing---") + print(big_query_operation.metadata.failure_count) + else: + print("---operation.metadata is empty---") + + if big_query_operation.result is not None: + print("---operation result:---") + print(big_query_operation.result()) + else: + print("---operation.result is empty---") + + import_products_from_big_query() -import_products_from_big_query() # [END retail_import_products_from_big_query] + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("dataset_id") + parser.add_argument("table_id") + args = parser.parse_args() + main(project_id, args.dataset_id, args.table_id) diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_bq_test.py b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_bq_test.py index b743ae722ed3..027ca53cc98e 100644 --- a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_bq_test.py +++ b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_bq_test.py @@ -15,12 +15,35 @@ import re import subprocess +from setup_product.setup_cleanup import ( + create_bq_dataset, + create_bq_table, + delete_bq_table, + upload_data_to_bq_table, +) + + +def test_import_products_bq(table_id_prefix): + dataset = "products" + valid_products_table = f"{table_id_prefix}products" + product_schema = "../resources/product_schema.json" + valid_products_source_file = "../resources/products.json" + + create_bq_dataset(dataset) + create_bq_table(dataset, valid_products_table, product_schema) + upload_data_to_bq_table( + dataset, valid_products_table, valid_products_source_file, product_schema + ) -def test_import_products_bq(): output = str( - subprocess.check_output("python import_products_big_query_table.py", shell=True) + subprocess.check_output( + f"python import_products_big_query_table.py {dataset} {valid_products_table}", + shell=True, + ) ) + delete_bq_table(dataset, valid_products_table) + assert re.match(".*import products from big query table request.*", output) assert re.match(".*the operation was started.*", output) assert re.match( @@ -28,4 +51,5 @@ def test_import_products_bq(): output, ) - assert re.match(".*number of successfully imported products.*316.*", output) + assert re.match(".*number of successfully imported products.*?316.*", output) + assert re.match(".*number of failures during the importing.*?0.*", output) diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_gcs.py b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_gcs.py index 1a8656042d09..1ca5f3e8d9da 100644 --- a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_gcs.py +++ b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_gcs.py @@ -28,12 +28,11 @@ # Read the project number from the environment variable -project_number = os.environ["GOOGLE_CLOUD_PROJECT_NUMBER"] project_id = os.environ["GOOGLE_CLOUD_PROJECT"] bucket_name = os.environ["BUCKET_NAME"] # You can change the branch here. The "default_branch" is set to point to the branch "0" -default_catalog = f"projects/{project_number}/locations/global/catalogs/default_catalog/branches/default_branch" +default_catalog = f"projects/{project_id}/locations/global/catalogs/default_catalog/branches/default_branch" gcs_bucket = f"gs://{bucket_name}" gcs_errors_bucket = f"{gcs_bucket}/error" diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_gcs_test.py b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_gcs_test.py index f8ec41496ec6..706112fdd4bb 100644 --- a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_gcs_test.py +++ b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_gcs_test.py @@ -12,13 +12,22 @@ # See the License for the specific language governing permissions and # limitations under the License. +import os import re import subprocess +from setup_product.setup_cleanup import create_bucket, delete_bucket, upload_blob + def test_import_products_gcs(): + bucket_name = os.environ["BUCKET_NAME"] + create_bucket(bucket_name) + upload_blob(bucket_name, "../resources/products.json") + output = str(subprocess.check_output("python import_products_gcs.py", shell=True)) + delete_bucket(bucket_name) + assert re.match(".*import products from google cloud source request.*", output) assert re.match('.*input_uris: "gs://.*/products.json".*', output) assert re.match(".*the operation was started.*", output) @@ -27,4 +36,5 @@ def test_import_products_gcs(): output, ) - assert re.match(".*number of successfully imported products.*316.*", output) + assert re.match(".*number of successfully imported products.*?316.*", output) + assert re.match(".*number of failures during the importing.*?0.*", output) diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_inline_source.py b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_inline_source.py index 23e899302b1f..5d6434d9f56e 100644 --- a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_inline_source.py +++ b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_inline_source.py @@ -33,9 +33,9 @@ from google.protobuf.field_mask_pb2 import FieldMask # Read the project number from the environment variable -project_number = os.environ["GOOGLE_CLOUD_PROJECT_NUMBER"] +project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -default_catalog = f"projects/{project_number}/locations/global/catalogs/default_catalog/branches/default_branch" +default_catalog = f"projects/{project_id}/locations/global/catalogs/default_catalog/branches/default_branch" # prepare product to import as inline source diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_inline_test.py b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_inline_test.py index 388cd5b603b9..e8f26d174e04 100644 --- a/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_inline_test.py +++ b/packages/google-cloud-retail/samples/interactive-tutorials/product/import_products_inline_test.py @@ -28,4 +28,5 @@ def test_import_products_gcs(): output, ) - assert re.match(".*number of successfully imported products.*2.*", output) + assert re.match(".*number of successfully imported products.*?2.*", output) + assert re.match(".*number of failures during the importing.*?0.*", output) diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/requirements-test.txt b/packages/google-cloud-retail/samples/interactive-tutorials/product/requirements-test.txt index bbf73145f7ee..6113315eddf7 100644 --- a/packages/google-cloud-retail/samples/interactive-tutorials/product/requirements-test.txt +++ b/packages/google-cloud-retail/samples/interactive-tutorials/product/requirements-test.txt @@ -1,2 +1,3 @@ pytest==6.2.5 pytest-xdist==2.5.0 +google-cloud-testutils==1.3.1 diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/setup/products_create_bigquery_table_test.py b/packages/google-cloud-retail/samples/interactive-tutorials/product/setup/products_create_bigquery_table_test.py deleted file mode 100644 index e6eee35350b3..000000000000 --- a/packages/google-cloud-retail/samples/interactive-tutorials/product/setup/products_create_bigquery_table_test.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright 2021 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import re -import subprocess - -project_id = os.environ["GOOGLE_CLOUD_PROJECT"] - - -def test_create_bigquery_table(): - output = str( - subprocess.check_output( - 'python setup/products_create_bigquery_table.py', - shell=True)) - assert re.match( - f'.*Creating dataset {project_id}.products.*', output) - assert re.match( - f'(.*dataset {project_id}.products already exists.*|.*dataset is created.*)', output) - assert re.match( - f'.*Creating BigQuery table {project_id}.products.products.*', output) - assert re.match( - f'(.*table {project_id}.products.products already exists.*|.*table is created.*)', output) - assert re.match( - f'.*Uploading data from ../resources/products.json to the table {project_id}.products.products.*', output) - assert re.match( - f'.*Creating BigQuery table {project_id}.products.products_some_invalid.*', - output) - assert re.match( - f'(.*table {project_id}.products.products_some_invalid already exists.*|.*table is created.*)', - output) - assert re.match( - f'.*Uploading data from ../resources/products_some_invalid.json to the table {project_id}.products.products_some_invalid.*', - output) diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/setup/products_create_gcs_bucket_test.py b/packages/google-cloud-retail/samples/interactive-tutorials/product/setup/products_create_gcs_bucket_test.py deleted file mode 100644 index 2050974518ae..000000000000 --- a/packages/google-cloud-retail/samples/interactive-tutorials/product/setup/products_create_gcs_bucket_test.py +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright 2021 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import re -import subprocess - -from products_delete_gcs_bucket import delete_bucket_by_name - - -def test_create_gcs_bucket(): - output = str( - subprocess.check_output( - 'python setup/products_create_gcs_bucket.py', - shell=True)) - - bucket_name = re.search('The gcs bucket (.+?) was created', output).group(1) - delete_bucket_by_name(bucket_name) - - print("bucket_name = {}".format(bucket_name)) - - assert re.match( - '.*Creating new bucket.*', output) - assert re.match( - '(.*The gcs bucket.*?was created.*|.*Bucket.*?already exists.*)', output) - assert re.match( - '.*Uploading data from ../resources/products.json to the bucket.*', output) - assert re.match( - '.*Uploading data from ../resources/products_some_invalid.json to the bucket.*', - output) diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/setup/products_create_bigquery_table.py b/packages/google-cloud-retail/samples/interactive-tutorials/product/setup_product/products_create_bigquery_table.py similarity index 100% rename from packages/google-cloud-retail/samples/interactive-tutorials/product/setup/products_create_bigquery_table.py rename to packages/google-cloud-retail/samples/interactive-tutorials/product/setup_product/products_create_bigquery_table.py diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/setup/products_create_gcs_bucket.py b/packages/google-cloud-retail/samples/interactive-tutorials/product/setup_product/products_create_gcs_bucket.py similarity index 100% rename from packages/google-cloud-retail/samples/interactive-tutorials/product/setup/products_create_gcs_bucket.py rename to packages/google-cloud-retail/samples/interactive-tutorials/product/setup_product/products_create_gcs_bucket.py diff --git a/packages/google-cloud-retail/samples/interactive-tutorials/product/setup/setup_cleanup.py b/packages/google-cloud-retail/samples/interactive-tutorials/product/setup_product/setup_cleanup.py similarity index 89% rename from packages/google-cloud-retail/samples/interactive-tutorials/product/setup/setup_cleanup.py rename to packages/google-cloud-retail/samples/interactive-tutorials/product/setup_product/setup_cleanup.py index 11027b9cc6ca..5c04f5e31e41 100644 --- a/packages/google-cloud-retail/samples/interactive-tutorials/product/setup/setup_cleanup.py +++ b/packages/google-cloud-retail/samples/interactive-tutorials/product/setup_product/setup_cleanup.py @@ -23,10 +23,9 @@ from google.cloud.retail_v2 import CreateProductRequest, DeleteProductRequest, \ FulfillmentInfo, GetProductRequest, PriceInfo, Product, ProductServiceClient -project_number = os.environ["GOOGLE_CLOUD_PROJECT_NUMBER"] project_id = os.environ["GOOGLE_CLOUD_PROJECT"] -default_catalog = f"projects/{project_number}/locations/global/catalogs/default_catalog" -default_branch_name = f"projects/{project_number}/locations/global/catalogs/default_catalog/branches/default_branch" +default_catalog = f"projects/{project_id}/locations/global/catalogs/default_catalog" +default_branch_name = f"projects/{project_id}/locations/global/catalogs/default_catalog/branches/default_branch" def generate_product() -> Product: @@ -100,7 +99,7 @@ def try_to_delete_product_if_exists(product_name: str): def create_bucket(bucket_name: str): """Create a new bucket in Cloud Storage""" print("Creating new bucket:" + bucket_name) - buckets_in_your_project = str(list_buckets()) + buckets_in_your_project = list_buckets() if bucket_name in buckets_in_your_project: print("Bucket {} already exists".format(bucket_name)) else: @@ -118,9 +117,9 @@ def create_bucket(bucket_name: str): def delete_bucket(bucket_name: str): """Delete a bucket from Cloud Storage""" + print(f"Deleting bucket name: {bucket_name}") storage_client = storage.Client() - print("Deleting bucket name:" + bucket_name) - buckets_in_your_project = str(list_buckets()) + buckets_in_your_project = list_buckets() if bucket_name in buckets_in_your_project: blobs = storage_client.list_blobs(bucket_name) for blob in blobs: @@ -138,7 +137,7 @@ def list_buckets(): storage_client = storage.Client() buckets = storage_client.list_buckets() for bucket in buckets: - bucket_list.append(str(bucket)) + bucket_list.append(bucket.name) return bucket_list @@ -194,6 +193,13 @@ def create_bq_table(dataset, table_name, schema_file_path): print("table is created") +def delete_bq_table(dataset, table_name): + full_table_id = f"{project_id}.{dataset}.{table_name}" + bq = bigquery.Client() + bq.delete_table(full_table_id, not_found_ok=True) + print("Table '{}' is deleted.".format(full_table_id)) + + def upload_data_to_bq_table(dataset, table_name, source, schema_file_path): """Upload data to the table from specified source file""" full_table_id = f"{project_id}.{dataset}.{table_name}" @@ -201,8 +207,11 @@ def upload_data_to_bq_table(dataset, table_name, source, schema_file_path): print(f"Uploading data from {source} to the table {full_table_id}") with open(schema_file_path, "rb") as schema: schema_dict = json.load(schema) - job_config = bigquery.LoadJobConfig(source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, schema=schema_dict) + job_config = bigquery.LoadJobConfig( + source_format=bigquery.SourceFormat.NEWLINE_DELIMITED_JSON, + schema=schema_dict) with open(source, "rb") as source_file: - job = bq.load_table_from_file(source_file, full_table_id, job_config=job_config) + job = bq.load_table_from_file(source_file, full_table_id, + job_config=job_config) job.result() # Waits for the job to complete. print("data was uploaded")