Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move ocr pdf/tiff samples to GA #1522

Merged
merged 2 commits into from
Jun 15, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions vision/cloud-client/detect/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ To run this sample:
$ python detect.py

usage: detect.py [-h]
{faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri}
{faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri}
...

This application demonstrates how to perform basic operations with the
Expand All @@ -94,12 +94,13 @@ To run this sample:
python detect.py web-uri http://wheresgus.com/dog.JPG
python detect.py web-geo ./resources/city.jpg
python detect.py faces-uri gs://your-bucket/file.jpg
python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf gs://BUCKET_NAME/PREFIX/

For more information, the documentation at
https://cloud.google.com/vision/docs.

positional arguments:
{faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri}
{faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri}
faces Detects faces in an image.
faces-uri Detects faces in the file located in Google Cloud
Storage or the web.
Expand Down Expand Up @@ -135,6 +136,7 @@ To run this sample:
document Detects document features in an image.
document-uri Detects document features in the file located in
Google Cloud Storage.
ocr-uri OCR with PDF/TIFF as source files on GCS

optional arguments:
-h, --help show this help message and exit
Expand Down
84 changes: 83 additions & 1 deletion vision/cloud-client/detect/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,20 @@
python detect.py web-uri http://wheresgus.com/dog.JPG
python detect.py web-geo ./resources/city.jpg
python detect.py faces-uri gs://your-bucket/file.jpg
python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf \
gs://BUCKET_NAME/PREFIX/

For more information, the documentation at
https://cloud.google.com/vision/docs.
"""

import argparse
import io
import re

from google.cloud import storage
from google.cloud import vision
from google.protobuf import json_format


# [START def_detect_faces]
Expand Down Expand Up @@ -636,6 +641,76 @@ def detect_document_uri(uri):
# [END def_detect_document_uri]


# [START vision_async_detect_document_ocr]
def async_detect_document(gcs_source_uri, gcs_destination_uri):
"""OCR with PDF/TIFF as source files on GCS"""
# Supported mime_types are: 'application/pdf' and 'image/tiff'
mime_type = 'application/pdf'

# How many pages should be grouped into each json output file.
batch_size = 2

client = vision.ImageAnnotatorClient()

feature = vision.types.Feature(
type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)

gcs_source = vision.types.GcsSource(uri=gcs_source_uri)
input_config = vision.types.InputConfig(
gcs_source=gcs_source, mime_type=mime_type)

gcs_destination = vision.types.GcsDestination(uri=gcs_destination_uri)
output_config = vision.types.OutputConfig(
gcs_destination=gcs_destination, batch_size=batch_size)

async_request = vision.types.AsyncAnnotateFileRequest(
features=[feature], input_config=input_config,
output_config=output_config)

operation = client.async_batch_annotate_files(
requests=[async_request])

print('Waiting for the operation to finish.')
operation.result(timeout=180)

# Once the request has completed and the output has been
# written to GCS, we can list all the output files.
storage_client = storage.Client()

match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri)
bucket_name = match.group(1)
prefix = match.group(2)

bucket = storage_client.get_bucket(bucket_name=bucket_name)

# List objects with the given prefix.
blob_list = list(bucket.list_blobs(prefix=prefix))
print('Output files:')
for blob in blob_list:
print(blob.name)

# Process the first output file from GCS.
# Since we specified batch_size=2, the first response contains
# the first two pages of the input file.
output = blob_list[0]

json_string = output.download_as_string()
response = json_format.Parse(
json_string, vision.types.AnnotateFileResponse())

# The actual response for the first page of the input file.
first_page_response = response.responses[0]
annotation = first_page_response.full_text_annotation

# Here we print the full text from the first page.
# The response contains more information:
# annotation/pages/blocks/paragraphs/words/symbols
# including confidence scores and bounding boxes
print(u'Full text:\n{}'.format(
annotation.text))
# [END vision_async_detect_document_ocr]


def run_local(args):
if args.command == 'faces':
detect_faces(args.path)
Expand Down Expand Up @@ -684,6 +759,8 @@ def run_uri(args):
detect_document_uri(args.uri)
elif args.command == 'web-geo-uri':
web_entities_include_geo_results_uri(args.uri)
elif args.command == 'ocr-uri':
async_detect_document(args.uri, args.destination_uri)


if __name__ == '__main__':
Expand Down Expand Up @@ -785,9 +862,14 @@ def run_uri(args):
'document-uri', help=detect_document_uri.__doc__)
document_uri_parser.add_argument('uri')

ocr_uri_parser = subparsers.add_parser(
'ocr-uri', help=async_detect_document.__doc__)
ocr_uri_parser.add_argument('uri')
ocr_uri_parser.add_argument('destination_uri')

args = parser.parse_args()

if ('uri' in args.command):
if 'uri' in args.command:
run_uri(args)
else:
run_local(args)
22 changes: 22 additions & 0 deletions vision/cloud-client/detect/detect_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,14 @@

import os

from google.cloud import storage

import detect

BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
OUTPUT_PREFIX = 'OCR_PDF_TEST_OUTPUT'
GCS_SOURCE_URI = 'gs://{}/HodgeConj.pdf'.format(BUCKET)
GCS_DESTINATION_URI = 'gs://{}/{}/'.format(BUCKET, OUTPUT_PREFIX)


def test_labels(capsys):
Expand Down Expand Up @@ -271,3 +276,20 @@ def test_detect_crop_hints_http(capsys):
detect.detect_crop_hints_uri(uri.format(BUCKET))
out, _ = capsys.readouterr()
assert 'bounds: (0,0)' in out


def test_async_detect_document(capsys):
storage_client = storage.Client()
bucket = storage_client.get_bucket(BUCKET)
assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0

detect.async_detect_document(
gcs_source_uri=GCS_SOURCE_URI,
gcs_destination_uri=GCS_DESTINATION_URI)
out, _ = capsys.readouterr()

assert 'Hodge conjecture' in out
assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 3

for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
blob.delete()
2 changes: 1 addition & 1 deletion vision/cloud-client/detect/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
google-cloud-vision==0.31.0
google-cloud-vision==0.32.0
google-cloud-storage==1.6.0