From c7ff91d10ae80372d472487c0d292a2eaa99f498 Mon Sep 17 00:00:00 2001 From: catalinaperalta Date: Wed, 29 Sep 2021 21:16:56 -0400 Subject: [PATCH] [formrecognizer] Update doc strings for v3 (#20920) * update docs based on Paul's feedback * improve bounding region docs * sample fixes * review feedback * spelling --- .../_document_analysis_client.py | 12 ++++++------ .../azure/ai/formrecognizer/_models.py | 3 ++- .../aio/_document_analysis_client_async.py | 12 ++++++------ .../sample_analyze_custom_documents.py | 18 ++++++++---------- .../v3.2-beta/sample_analyze_document.py | 14 +++++++------- .../samples/v3.2-beta/sample_analyze_layout.py | 10 +++++----- 6 files changed, 34 insertions(+), 35 deletions(-) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_document_analysis_client.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_document_analysis_client.py index 4423767f03bc..b71b519195e1 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_document_analysis_client.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_document_analysis_client.py @@ -86,8 +86,8 @@ def begin_analyze_document(self, model, document, **kwargs): :keyword str pages: Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like `pages="1-3, 5-6"`. Separate each page number or range with a comma. - :keyword str locale: Locale of the document. Supported locales include: en-US, en-AU, en-CA, en-GB, - and en-IN. + :keyword str locale: Locale hint of the input document. + See supported locales here: https://aka.ms/azsdk/formrecognizer/supportedlocales. :keyword str continuation_token: A continuation token to restart a poller from a saved state. :return: An instance of an LROPoller. Call `result()` on the poller object to return a :class:`~azure.ai.formrecognizer.AnalyzeResult`. @@ -137,13 +137,13 @@ def begin_analyze_document_from_url(self, model, document_url, **kwargs): Use this to specify the custom model ID or prebuilt model ID. Prebuilt model IDs to use are: "prebuilt-receipt", "prebuilt-invoice", "prebuilt-idDocument", "prebuilt-businessCard", "prebuilt-document", "prebuilt-layout". - :param str document_url: The URL of the document to analyze. The input must be a valid, encoded URL - of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP. + :param str document_url: The URL of the document to analyze. The input must be a valid, encoded, and + publicly accessible URL of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP. :keyword str pages: Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like `pages="1-3, 5-6"`. Separate each page number or range with a comma. - :keyword str locale: Locale of the document. Supported locales include: en-US, en-AU, en-CA, en-GB, - and en-IN. + :keyword str locale: Locale hint of the input document. + See supported locales here: https://aka.ms/azsdk/formrecognizer/supportedlocales. :keyword str continuation_token: A continuation token to restart a poller from a saved state. :return: An instance of an LROPoller. Call `result()` on the poller object to return a :class:`~azure.ai.formrecognizer.AnalyzeResult`. diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py index 3f5fc6cfade1..70c2b6ff6fd2 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/_models.py @@ -2071,7 +2071,8 @@ class BoundingRegion(object): :ivar list[~azure.ai.formrecognizer.Point] bounding_box: A list of 4 points representing the quadrilateral bounding box that outlines the text. The points are listed in clockwise - order: top-left, top-right, bottom-right, bottom-left. + order relative to the text orientation: top-left, top-right, + bottom-right, bottom-left. Units are in pixels for images and inches for PDF. :ivar int page_number: The 1-based number of the page in which this content is present. diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_document_analysis_client_async.py b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_document_analysis_client_async.py index dae6a5436841..f5ebaa29eb3f 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_document_analysis_client_async.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/azure/ai/formrecognizer/aio/_document_analysis_client_async.py @@ -92,8 +92,8 @@ async def begin_analyze_document( :keyword str pages: Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like `pages="1-3, 5-6"`. Separate each page number or range with a comma. - :keyword str locale: Locale of the document. Supported locales include: en-US, en-AU, en-CA, en-GB, - and en-IN. + :keyword str locale: Locale hint of the input document. + See supported locales here: https://aka.ms/azsdk/formrecognizer/supportedlocales. :keyword str continuation_token: A continuation token to restart a poller from a saved state. :return: An instance of an AsyncLROPoller. Call `result()` on the poller object to return a :class:`~azure.ai.formrecognizer.AnalyzeResult`. @@ -144,13 +144,13 @@ async def begin_analyze_document_from_url( Use this to specify the custom model ID or prebuilt model ID. Prebuilt model IDs to use are: "prebuilt-receipt", "prebuilt-invoice", "prebuilt-idDocument", "prebuilt-businessCard", "prebuilt-document", "prebuilt-layout". - :param str document_url: The URL of the document to analyze. The input must be a valid, encoded URL - of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP. + :param str document_url: The URL of the document to analyze. The input must be a valid, encoded, and + publicly accessible URL of one of the supported formats: JPEG, PNG, PDF, TIFF, or BMP. :keyword str pages: Custom page numbers for multi-page documents(PDF/TIFF). Input the page numbers and/or ranges of pages you want to get in the result. For a range of pages, use a hyphen, like `pages="1-3, 5-6"`. Separate each page number or range with a comma. - :keyword str locale: Locale of the document. Supported locales include: en-US, en-AU, en-CA, en-GB, - and en-IN. + :keyword str locale: Locale hint of the input document. + See supported locales here: https://aka.ms/azsdk/formrecognizer/supportedlocales. :keyword str continuation_token: A continuation token to restart a poller from a saved state. :return: An instance of an AsyncLROPoller. Call `result()` on the poller object to return a :class:`~azure.ai.formrecognizer.AnalyzeResult`. diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_custom_documents.py b/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_custom_documents.py index 3a6dd7c403ca..1688dd30a3c4 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_custom_documents.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_custom_documents.py @@ -61,8 +61,8 @@ def analyze_custom_documents(custom_model_id): for idx, document in enumerate(result.documents): print("--------Analyzing document #{}--------".format(idx + 1)) print("Document has type {}".format(document.doc_type)) - print("Document has document type confidence {}".format(document.confidence)) - print("Document was analyzed with model with ID {}".format(result.model_id)) + print("Document has confidence {}".format(document.confidence)) + print("Document was analyzed by model with ID {}".format(result.model_id)) for name, field in document.fields.items(): field_value = field.value if field.value else field.content print("......found field of type '{}' with value '{}' and with confidence {}".format(field.value_type, field_value, field.confidence)) @@ -79,14 +79,12 @@ def analyze_custom_documents(custom_model_id): word.content, word.confidence ) ) - if page.selection_marks: - print("\nSelection marks found on page {}".format(page.page_number)) - for selection_mark in page.selection_marks: - print( - "...Selection mark is '{}' and has a confidence of {}".format( - selection_mark.state, selection_mark.confidence - ) + for selection_mark in page.selection_marks: + print( + "...Selection mark is '{}' and has a confidence of {}".format( + selection_mark.state, selection_mark.confidence ) + ) for i, table in enumerate(result.tables): print("\nTable {} can be found on page:".format(i + 1)) @@ -94,7 +92,7 @@ def analyze_custom_documents(custom_model_id): print("...{}".format(i + 1, region.page_number)) for cell in table.cells: print( - "...Cell[{}][{}] has text '{}'".format( + "...Cell[{}][{}] has content '{}'".format( cell.row_index, cell.column_index, cell.content ) ) diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_document.py b/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_document.py index 8a766a41199f..21f96b83e607 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_document.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_document.py @@ -70,8 +70,8 @@ def analyze_document(): ) ) - for idx, page in enumerate(result.pages): - print("----Analyzing document from page #{}----".format(idx + 1)) + for page in result.pages: + print("----Analyzing document from page #{}----".format(page.page_number)) print( "Page has width: {} and height: {}, measured with unit: {}".format( page.width, page.height, page.unit @@ -80,7 +80,7 @@ def analyze_document(): for line_idx, line in enumerate(page.lines): print( - "Line # {} has text content '{}' within bounding box '{}'".format( + "...Line # {} has text content '{}' within bounding box '{}'".format( line_idx, line.content, format_bounding_box(line.bounding_box), @@ -96,7 +96,7 @@ def analyze_document(): for selection_mark in page.selection_marks: print( - "Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format( + "...Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format( selection_mark.state, format_bounding_box(selection_mark.bounding_box), selection_mark.confidence, @@ -119,7 +119,7 @@ def analyze_document(): ) for cell in table.cells: print( - "...Cell[{}][{}] has text '{}'".format( + "...Cell[{}][{}] has content '{}'".format( cell.row_index, cell.column_index, cell.content, @@ -134,14 +134,14 @@ def analyze_document(): ) print("----Entities found in document----") - for idx, entity in enumerate(result.entities): + for entity in result.entities: print("Entity of category '{}' with sub-category '{}'".format(entity.category, entity.sub_category)) print("...has content '{}'".format(entity.content)) print("...within '{}' bounding regions".format(format_bounding_region(entity.bounding_regions))) print("...with confidence {}".format(entity.confidence)) print("----Key-value pairs found in document----") - for idx, kv_pair in enumerate(result.key_value_pairs): + for kv_pair in result.key_value_pairs: if kv_pair.key: print( "Key '{}' found within '{}' bounding regions".format( diff --git a/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_layout.py b/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_layout.py index 2822a83c525a..c48216a03e52 100644 --- a/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_layout.py +++ b/sdk/formrecognizer/azure-ai-formrecognizer/samples/v3.2-beta/sample_analyze_layout.py @@ -66,8 +66,8 @@ def analyze_layout(): ) ) - for idx, page in enumerate(result.pages): - print("----Analyzing layout from page #{}----".format(idx + 1)) + for page in result.pages: + print("----Analyzing layout from page #{}----".format(page.page_number)) print( "Page has width: {} and height: {}, measured with unit: {}".format( page.width, page.height, page.unit @@ -76,7 +76,7 @@ def analyze_layout(): for line_idx, line in enumerate(page.lines): print( - "Line # {} has text content '{}' within bounding box '{}'".format( + "...Line # {} has text content '{}' within bounding box '{}'".format( line_idx, line.content, format_bounding_box(line.bounding_box), @@ -92,7 +92,7 @@ def analyze_layout(): for selection_mark in page.selection_marks: print( - "Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format( + "...Selection mark is '{}' within bounding box '{}' and has a confidence of {}".format( selection_mark.state, format_bounding_box(selection_mark.bounding_box), selection_mark.confidence, @@ -115,7 +115,7 @@ def analyze_layout(): ) for cell in table.cells: print( - "...Cell[{}][{}] has text '{}'".format( + "...Cell[{}][{}] has content '{}'".format( cell.row_index, cell.column_index, cell.content,