Skip to content

Commit

Permalink
Move ocr pdf/tiff samples to GA (#1522)
Browse files Browse the repository at this point in the history
* Move ocr pdf/tiff samples to GA

* Remove blank spaces and fragment
  • Loading branch information
nnegrey authored and andrewsg committed Jun 15, 2018
1 parent b734ad2 commit 785b2b7
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 4 deletions.
6 changes: 4 additions & 2 deletions vision/cloud-client/detect/README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ To run this sample:
$ python detect.py
usage: detect.py [-h]
{faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri}
{faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri}
...
This application demonstrates how to perform basic operations with the
Expand All @@ -94,12 +94,13 @@ To run this sample:
python detect.py web-uri http://wheresgus.com/dog.JPG
python detect.py web-geo ./resources/city.jpg
python detect.py faces-uri gs://your-bucket/file.jpg
python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf gs://BUCKET_NAME/PREFIX/
For more information, the documentation at
https://cloud.google.com/vision/docs.
positional arguments:
{faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri}
{faces,faces-uri,labels,labels-uri,landmarks,landmarks-uri,text,text-uri,logos,logos-uri,safe-search,safe-search-uri,properties,properties-uri,web,web-uri,web-geo,web-geo-uri,crophints,crophints-uri,document,document-uri,ocr-uri}
faces Detects faces in an image.
faces-uri Detects faces in the file located in Google Cloud
Storage or the web.
Expand Down Expand Up @@ -135,6 +136,7 @@ To run this sample:
document Detects document features in an image.
document-uri Detects document features in the file located in
Google Cloud Storage.
ocr-uri OCR with PDF/TIFF as source files on GCS
optional arguments:
-h, --help show this help message and exit
Expand Down
84 changes: 83 additions & 1 deletion vision/cloud-client/detect/detect.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,20 @@
python detect.py web-uri http://wheresgus.com/dog.JPG
python detect.py web-geo ./resources/city.jpg
python detect.py faces-uri gs://your-bucket/file.jpg
python detect_pdf.py ocr-uri gs://python-docs-samples-tests/HodgeConj.pdf \
gs://BUCKET_NAME/PREFIX/
For more information, the documentation at
https://cloud.google.com/vision/docs.
"""

import argparse
import io
import re

from google.cloud import storage
from google.cloud import vision
from google.protobuf import json_format


# [START def_detect_faces]
Expand Down Expand Up @@ -636,6 +641,76 @@ def detect_document_uri(uri):
# [END def_detect_document_uri]


# [START vision_async_detect_document_ocr]
def async_detect_document(gcs_source_uri, gcs_destination_uri):
"""OCR with PDF/TIFF as source files on GCS"""
# Supported mime_types are: 'application/pdf' and 'image/tiff'
mime_type = 'application/pdf'

# How many pages should be grouped into each json output file.
batch_size = 2

client = vision.ImageAnnotatorClient()

feature = vision.types.Feature(
type=vision.enums.Feature.Type.DOCUMENT_TEXT_DETECTION)

gcs_source = vision.types.GcsSource(uri=gcs_source_uri)
input_config = vision.types.InputConfig(
gcs_source=gcs_source, mime_type=mime_type)

gcs_destination = vision.types.GcsDestination(uri=gcs_destination_uri)
output_config = vision.types.OutputConfig(
gcs_destination=gcs_destination, batch_size=batch_size)

async_request = vision.types.AsyncAnnotateFileRequest(
features=[feature], input_config=input_config,
output_config=output_config)

operation = client.async_batch_annotate_files(
requests=[async_request])

print('Waiting for the operation to finish.')
operation.result(timeout=180)

# Once the request has completed and the output has been
# written to GCS, we can list all the output files.
storage_client = storage.Client()

match = re.match(r'gs://([^/]+)/(.+)', gcs_destination_uri)
bucket_name = match.group(1)
prefix = match.group(2)

bucket = storage_client.get_bucket(bucket_name=bucket_name)

# List objects with the given prefix.
blob_list = list(bucket.list_blobs(prefix=prefix))
print('Output files:')
for blob in blob_list:
print(blob.name)

# Process the first output file from GCS.
# Since we specified batch_size=2, the first response contains
# the first two pages of the input file.
output = blob_list[0]

json_string = output.download_as_string()
response = json_format.Parse(
json_string, vision.types.AnnotateFileResponse())

# The actual response for the first page of the input file.
first_page_response = response.responses[0]
annotation = first_page_response.full_text_annotation

# Here we print the full text from the first page.
# The response contains more information:
# annotation/pages/blocks/paragraphs/words/symbols
# including confidence scores and bounding boxes
print(u'Full text:\n{}'.format(
annotation.text))
# [END vision_async_detect_document_ocr]


def run_local(args):
if args.command == 'faces':
detect_faces(args.path)
Expand Down Expand Up @@ -684,6 +759,8 @@ def run_uri(args):
detect_document_uri(args.uri)
elif args.command == 'web-geo-uri':
web_entities_include_geo_results_uri(args.uri)
elif args.command == 'ocr-uri':
async_detect_document(args.uri, args.destination_uri)


if __name__ == '__main__':
Expand Down Expand Up @@ -785,9 +862,14 @@ def run_uri(args):
'document-uri', help=detect_document_uri.__doc__)
document_uri_parser.add_argument('uri')

ocr_uri_parser = subparsers.add_parser(
'ocr-uri', help=async_detect_document.__doc__)
ocr_uri_parser.add_argument('uri')
ocr_uri_parser.add_argument('destination_uri')

args = parser.parse_args()

if ('uri' in args.command):
if 'uri' in args.command:
run_uri(args)
else:
run_local(args)
22 changes: 22 additions & 0 deletions vision/cloud-client/detect/detect_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,14 @@

import os

from google.cloud import storage

import detect

BUCKET = os.environ['CLOUD_STORAGE_BUCKET']
OUTPUT_PREFIX = 'OCR_PDF_TEST_OUTPUT'
GCS_SOURCE_URI = 'gs://{}/HodgeConj.pdf'.format(BUCKET)
GCS_DESTINATION_URI = 'gs://{}/{}/'.format(BUCKET, OUTPUT_PREFIX)


def test_labels(capsys):
Expand Down Expand Up @@ -271,3 +276,20 @@ def test_detect_crop_hints_http(capsys):
detect.detect_crop_hints_uri(uri.format(BUCKET))
out, _ = capsys.readouterr()
assert 'bounds: (0,0)' in out


def test_async_detect_document(capsys):
storage_client = storage.Client()
bucket = storage_client.get_bucket(BUCKET)
assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 0

detect.async_detect_document(
gcs_source_uri=GCS_SOURCE_URI,
gcs_destination_uri=GCS_DESTINATION_URI)
out, _ = capsys.readouterr()

assert 'Hodge conjecture' in out
assert len(list(bucket.list_blobs(prefix=OUTPUT_PREFIX))) == 3

for blob in bucket.list_blobs(prefix=OUTPUT_PREFIX):
blob.delete()
2 changes: 1 addition & 1 deletion vision/cloud-client/detect/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
google-cloud-vision==0.31.0
google-cloud-vision==0.32.0
google-cloud-storage==1.6.0

0 comments on commit 785b2b7

Please sign in to comment.