Skip to content

Commit

Permalink
[formrecognizer] handle unsupervised pages better with service bug (#…
Browse files Browse the repository at this point in the history
…11017)

* handle unsupervised pages better

* python 2 oops
  • Loading branch information
kristapratico authored Apr 23, 2020
1 parent 269715f commit a77ebda
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,20 +15,23 @@
def get_elements(field, read_result):
text_elements = []

for item in field.elements:
nums = [int(s) for s in re.findall(r"\d+", item)]
read = nums[0]
line = nums[1]
if len(nums) == 3:
word = nums[2]
ocr_word = read_result[read].lines[line].words[word]
extracted_word = FormWord._from_generated(ocr_word, page=read + 1)
text_elements.append(extracted_word)
continue
ocr_line = read_result[read].lines[line]
extracted_line = FormLine._from_generated(ocr_line, page=read + 1)
text_elements.append(extracted_line)
return text_elements
try:
for item in field.elements:
nums = [int(s) for s in re.findall(r"\d+", item)]
read = nums[0]
line = nums[1]
if len(nums) == 3:
word = nums[2]
ocr_word = read_result[read].lines[line].words[word]
extracted_word = FormWord._from_generated(ocr_word, page=read + 1)
text_elements.append(extracted_word)
continue
ocr_line = read_result[read].lines[line]
extracted_line = FormLine._from_generated(ocr_line, page=read + 1)
text_elements.append(extracted_line)
return text_elements
except IndexError:
return None # https://github.com/Azure/azure-sdk-for-python/issues/11014


def get_field_value(field, value, read_result): # pylint: disable=too-many-return-statements
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,15 @@ def prepare_content_result(response):
read_result = response.analyze_result.read_results
page_result = response.analyze_result.page_results

for page in read_result:
for idx, page in enumerate(read_result):
form_page = FormPage(
page_number=page.page,
text_angle=page.angle,
width=page.width,
height=page.height,
unit=page.unit,
lines=[FormLine._from_generated(line, page=page.page) for line in page.lines] if page.lines else None,
tables=prepare_tables(page_result[page.page-1], read_result),
tables=prepare_tables(page_result[idx], read_result),
)
pages.append(form_page)
return pages
Expand All @@ -126,7 +126,7 @@ def prepare_unlabeled_result(response):
read_result = response.analyze_result.read_results
page_result = response.analyze_result.page_results

for page in page_result:
for index, page in enumerate(page_result):
unlabeled_fields = [FormField._from_generated_unlabeled(field, idx, page.page, read_result)
for idx, field in enumerate(page.key_value_pairs)] if page.key_value_pairs else None
if unlabeled_fields:
Expand All @@ -138,7 +138,7 @@ def prepare_unlabeled_result(response):
),
fields=unlabeled_fields,
form_type="form-" + str(page.cluster_id) if page.cluster_id is not None else None,
pages=[form_pages[page.page-1]]
pages=[form_pages[index]]
)
result.append(form)

Expand Down

0 comments on commit a77ebda

Please sign in to comment.