From 0f0d9a783c12d5ebc94e72506f1ac9af03f97e41 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Wed, 13 Jun 2018 09:08:50 -0700 Subject: [PATCH 1/2] Move ocr pdf/tiff samples to GA --- vision/cloud-client/detect/README.rst | 6 +- vision/cloud-client/detect/detect.py | 85 ++++++++++++++++++++- vision/cloud-client/detect/detect_test.py | 22 ++++++ vision/cloud-client/detect/requirements.txt | 2 +- 4 files changed, 111 insertions(+), 4 deletions(-) diff --git a/vision/cloud-client/detect/README.rst b/vision/cloud-client/detect/README.rst index c0eb13fbabc6..b63421f8472b 100644 --- a/vision/cloud-client/detect/README.rst +++ b/vision/cloud-client/detect/README.rst @@ -81,7 +81,7 @@ To run this sample: $ python detect.py usage: detect.py [-h] - {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri} + {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri} ... This application demonstrates how to perform basic operations with the @@ -94,12 +94,13 @@ To run this sample: python detect.py web-uri http://wheresgus.com/dog.JPG python detect.py web-geo ./resources/city.jpg python detect.py faces-uri gs://your-bucket/file.jpg + python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf gs://BUCKET_NAME/PREFIX/ For more information, the documentation at https://cloud.google.com/vision/docs. positional arguments: - {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri} + {faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri} faces Detects faces in an image. faces-uri Detects faces in the file located in Google Cloud Storage or the web. @@ -135,6 +136,7 @@ To run this sample: document Detects document features in an image. document-uri Detects document features in the file located in Google Cloud Storage. + ocr-uri OCR with PDF/TIFF as source files on GCS optional arguments: -h, --help show this help message and exit diff --git a/vision/cloud-client/detect/detect.py b/vision/cloud-client/detect/detect.py index 074f4ccc083c..fd491e908638 100644 --- a/vision/cloud-client/detect/detect.py +++ b/vision/cloud-client/detect/detect.py @@ -24,6 +24,8 @@ python detect.py web-uri http://wheresgus.com/dog.JPG python detect.py web-geo ./resources/city.jpg python detect.py faces-uri gs://your-bucket/file.jpg +python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf \ + gs://BUCKET_NAME/PREFIX/ For more information, the documentation at https://cloud.google.com/vision/docs. @@ -31,8 +33,11 @@ import argparse import io +import re +from google.cloud import storage from google.cloud import vision +from google.protobuf import json_format # [START def_detect_faces] @@ -636,6 +641,77 @@ def detect_document_uri(uri): # [END def_detect_document_uri] +# [START vision_async_detect_document_ocr] +def async_detect_document(gcs_source_uri, gcs_destination_uri): + """OCR with PDF/TIFF as source files on GCS""" + # Supported mime_types are: 'application/pdf' and 'image/tiff' + mime_type = 'application/pdf' + + # How many pages should be grouped into each json output file. + # With a file of 5 pages + batch_size = 2 + + client = vision.ImageAnnotatorClient() + + feature = vision.types.Feature( + type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION) + + gcs_source = vision.types.GcsSource(uri=gcs_source_uri) + input_config = vision.types.InputConfig( + gcs_source=gcs_source, mime_type=mime_type) + + gcs_destination = vision.types.GcsDestination(uri=gcs_destination_uri) + output_config = vision.types.OutputConfig( + gcs_destination=gcs_destination, batch_size=batch_size) + + async_request = vision.types.AsyncAnnotateFileRequest( + features=[feature], input_config=input_config, + output_config=output_config) + + operation = client.async_batch_annotate_files( + requests=[async_request]) + + print('Waiting for the operation to finish.') + operation.result(timeout=180) + + # Once the request has completed and the output has been + # written to GCS, we can list all the output files. + storage_client = storage.Client() + + match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri) + bucket_name = match.group(1) + prefix = match.group(2) + + bucket = storage_client.get_bucket(bucket_name=bucket_name) + + # List objects with the given prefix. + blob_list = list(bucket.list_blobs(prefix=prefix)) + print('Output files:') + for blob in blob_list: + print(blob.name) + + # Process the first output file from GCS. + # Since we specified batch_size=2, the first response contains + # the first two pages of the input file. + output = blob_list[0] + + json_string = output.download_as_string() + response = json_format.Parse( + json_string, vision.types.AnnotateFileResponse()) + + # The actual response for the first page of the input file. + first_page_response = response.responses[0] + annotation = first_page_response.full_text_annotation + + # Here we print the full text from the first page. + # The response contains more information: + # annotation/pages/blocks/paragraphs/words/symbols + # including confidence scores and bounding boxes + print(u'Full text:\n{}'.format( + annotation.text)) +# [END vision_async_detect_document_ocr] + + def run_local(args): if args.command == 'faces': detect_faces(args.path) @@ -684,6 +760,8 @@ def run_uri(args): detect_document_uri(args.uri) elif args.command == 'web-geo-uri': web_entities_include_geo_results_uri(args.uri) + elif args.command == 'ocr-uri': + async_detect_document(args.uri, args.destination_uri) if __name__ == '__main__': @@ -785,9 +863,14 @@ def run_uri(args): 'document-uri', help=detect_document_uri.__doc__) document_uri_parser.add_argument('uri') + ocr_uri_parser = subparsers.add_parser( + 'ocr-uri', help=async_detect_document.__doc__) + ocr_uri_parser.add_argument('uri') + ocr_uri_parser.add_argument('destination_uri') + args = parser.parse_args() - if ('uri' in args.command): + if 'uri' in args.command: run_uri(args) else: run_local(args) diff --git a/vision/cloud-client/detect/detect_test.py b/vision/cloud-client/detect/detect_test.py index 0510d1006d5c..f298860b0fd8 100644 --- a/vision/cloud-client/detect/detect_test.py +++ b/vision/cloud-client/detect/detect_test.py @@ -14,9 +14,14 @@ import os +from google.cloud import storage + import detect BUCKET = os.environ['CLOUD_STORAGE_BUCKET'] +OUTPUT_PREFIX = 'OCR_PDF_TEST_OUTPUT' +GCS_SOURCE_URI = 'gs://{}/HodgeConj.pdf'.format(BUCKET) +GCS_DESTINATION_URI = 'gs://{}/{}/'.format(BUCKET, OUTPUT_PREFIX) def test_labels(capsys): @@ -271,3 +276,20 @@ def test_detect_crop_hints_http(capsys): detect.detect_crop_hints_uri(uri.format(BUCKET)) out, _ = capsys.readouterr() assert 'bounds: (0,0)' in out + + +def test_async_detect_document(capsys): + storage_client = storage.Client() + bucket = storage_client.get_bucket(BUCKET) + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0 + + detect.async_detect_document( + gcs_source_uri=GCS_SOURCE_URI, + gcs_destination_uri=GCS_DESTINATION_URI) + out, _ = capsys.readouterr() + + assert 'Hodge conjecture' in out + assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 3 + + for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX): + blob.delete() diff --git a/vision/cloud-client/detect/requirements.txt b/vision/cloud-client/detect/requirements.txt index a6c769cef90d..c7b2576f206c 100644 --- a/vision/cloud-client/detect/requirements.txt +++ b/vision/cloud-client/detect/requirements.txt @@ -1,2 +1,2 @@ -google-cloud-vision==0.31.0 +google-cloud-vision==0.32.0 google-cloud-storage==1.6.0 From d633a51d809b3dcaccfd8d8a08391b70f0c33567 Mon Sep 17 00:00:00 2001 From: nnegrey Date: Fri, 15 Jun 2018 09:27:17 -0700 Subject: [PATCH 2/2] Remove blank spaces and fragment --- vision/cloud-client/detect/README.rst | 2 +- vision/cloud-client/detect/detect.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/vision/cloud-client/detect/README.rst b/vision/cloud-client/detect/README.rst index b63421f8472b..0f6f5003c1aa 100644 --- a/vision/cloud-client/detect/README.rst +++ b/vision/cloud-client/detect/README.rst @@ -94,7 +94,7 @@ To run this sample: python detect.py web-uri http://wheresgus.com/dog.JPG python detect.py web-geo ./resources/city.jpg python detect.py faces-uri gs://your-bucket/file.jpg - python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf gs://BUCKET_NAME/PREFIX/ + python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf gs://BUCKET_NAME/PREFIX/ For more information, the documentation at https://cloud.google.com/vision/docs. diff --git a/vision/cloud-client/detect/detect.py b/vision/cloud-client/detect/detect.py index fd491e908638..7a15430d625f 100644 --- a/vision/cloud-client/detect/detect.py +++ b/vision/cloud-client/detect/detect.py @@ -25,7 +25,7 @@ python detect.py web-geo ./resources/city.jpg python detect.py faces-uri gs://your-bucket/file.jpg python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf \ - gs://BUCKET_NAME/PREFIX/ +gs://BUCKET_NAME/PREFIX/ For more information, the documentation at https://cloud.google.com/vision/docs. @@ -648,7 +648,6 @@ def async_detect_document(gcs_source_uri, gcs_destination_uri): mime_type = 'application/pdf' # How many pages should be grouped into each json output file. - # With a file of 5 pages batch_size = 2 client = vision.ImageAnnotatorClient()