Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[form recognizer] add repr #11150

Merged
merged 6 commits into from
Apr 30, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,11 @@ def __init__(self, **kwargs):
self.page_range = kwargs.get("page_range", None)
self.pages = kwargs.get("pages", None)

def __repr__(self):
return "RecognizedForm(form_type={}, fields={}, page_range={}, pages={})".format(
iscai-msft marked this conversation as resolved.
Show resolved Hide resolved
self.form_type, repr(self.fields), repr(self.page_range), repr(self.pages)
)


class USReceipt(object): # pylint: disable=too-many-instance-attributes
"""Extracted fields found on the US sales receipt. Provides
Expand All @@ -182,7 +187,7 @@ class USReceipt(object): # pylint: disable=too-many-instance-attributes
:ivar list[~azure.ai.formrecognizer.USReceiptItem] receipt_items:
The purchased items found on the receipt.
:ivar ~azure.ai.formrecognizer.FormField subtotal:
The subtotal found on the receipt.
The subtotal found on the receipt
:ivar ~azure.ai.formrecognizer.FormField tax:
The tax value found on the receipt.
:ivar ~azure.ai.formrecognizer.FormField tip:
Expand Down Expand Up @@ -224,6 +229,17 @@ def __init__(self, **kwargs):
self.form_type = kwargs.get("form_type", None)
self.receipt_locale = kwargs.get("receipt_locale", "en-US")

def __repr__(self):
return "USReceipt(merchant_address={}, merchant_name={}, merchant_phone_number={}, " \
"receipt_type={}, receipt_items={}, subtotal={}, tax={}, tip={}, total={}, "\
"transaction_date={}, transaction_time={}, fields={}, page_range={}, pages={}, " \
"form_type={}, receipt_locale={})".format(
repr(self.merchant_address), repr(self.merchant_name), repr(self.merchant_phone_number),
repr(self.receipt_type), repr(self.receipt_items), repr(self.subtotal), repr(self.tax),
repr(self.tip), repr(self.total), repr(self.transaction_date), repr(self.transaction_time),
repr(self.fields), repr(self.page_range), repr(self.pages), self.form_type, self.receipt_locale
)


class FormField(object):
"""Represents a field recognized in an input form.
Expand Down Expand Up @@ -263,6 +279,7 @@ def _from_generated(cls, field, value, read_result):
page_number=value.page if value else None,
)


@classmethod
def _from_generated_unlabeled(cls, field, idx, page, read_result):
return cls(
Expand All @@ -274,6 +291,11 @@ def _from_generated_unlabeled(cls, field, idx, page, read_result):
page_number=page,
)

def __repr__(self):
return "FormField(label_data={}, value_data={}, name={}, value={}, confidence={}, page_number={})".format(
repr(self.label_data), repr(self.value_data), self.name, repr(self.value), self.confidence, self.page_number
)


class FieldText(FormContent):
"""Represents the text that is part of a form field. This includes
Expand Down Expand Up @@ -328,6 +350,11 @@ def _from_generated_unlabeled(cls, field, page, read_result):
text_content=get_elements(field, read_result) if field.elements else None
)

def __repr__(self):
return "FieldText(page_number={}, text={}, bounding_box={}, text_content={})".format(
self.page_number, self.text, self.bounding_box, repr(self.text_content)
)


class FormPage(object):
"""Represents a page recognized from the input document. Contains lines,
Expand Down Expand Up @@ -377,6 +404,11 @@ def _from_generated(cls, read_result):
lines=[FormLine._from_generated(line, page=page.page) for line in page.lines] if page.lines else None
) for page in read_result]

def __repr__(self):
return "FormPage(page_number={}, text_angle={}, width={}, height={}, unit={}, tables={}, lines={})".format(
self.page_number, self.text_angle, self.width, self.height, self.unit, repr(self.tables), repr(self.lines)
)


class FormLine(FormContent):
"""An object representing an extracted line of text.
Expand Down Expand Up @@ -411,6 +443,10 @@ def _from_generated(cls, line, page):
words=[FormWord._from_generated(word, page) for word in line.words] if line.words else None
)

def __repr__(self):
return "FormLine(text={}, bounding_box={}, words={}, page_number={})".format(
self.text, self.bounding_box, repr(self.words), self.page_number
)

class FormWord(FormContent):
"""Represents a word recognized from the input document.
Expand Down Expand Up @@ -445,6 +481,11 @@ def _from_generated(cls, word, page):
page_number=page
)

def __repr__(self):
return "FormWord(text={}, bounding_box={}, confidence={}, page_number={})".format(
self.text, self.bounding_box, self.confidence, self.page_number
)


class USReceiptType(object):
"""The type of the analyzed US receipt and the confidence
Expand All @@ -466,6 +507,9 @@ def _from_generated(cls, item):
type=item.value_string,
confidence=item.confidence or 1.0) if item else None

def __repr__(self):
return "USReceiptType(type={}, confidence={})".format(self.type, self.confidence)


class USReceiptItem(object):
"""A receipt item on a US sales receipt.
Expand Down Expand Up @@ -500,6 +544,11 @@ def _from_generated(cls, items, read_result):
except AttributeError:
return []

def __repr__(self):
return "USReceiptItem(name={}, quantity={}, price={}, total_price={})".format(
repr(self.name), repr(self.quantity), repr(self.price), repr(self.total_price)
)


class FormTable(object):
"""Information about the extracted table contained on a page.
Expand All @@ -517,6 +566,11 @@ def __init__(self, **kwargs):
self.row_count = kwargs.get("row_count", None)
self.column_count = kwargs.get("column_count", None)

def __repr__(self):
return "FormTable(cells={}, row_count={}, column_count={})".format(
repr(self.cells), self.row_count, self.column_count
)


class FormTableCell(FormContent):
"""Represents a cell contained in a table recognized from the input document.
Expand Down Expand Up @@ -576,6 +630,13 @@ def _from_generated(cls, cell, page, read_result):
text_content=get_elements(cell, read_result) if cell.elements else None
)

def __repr__(self):
return "FormTableCell(text={}, row_index={}, column_index={}, row_span={}, column_span={}, " \
"bounding_box={}, confidence={}, is_header={}, is_footer={}, page_number={}, text_content={})".format(
self.text, self.row_index, self.column_index, self.row_span, self.column_span, self.bounding_box,
self.confidence, self.is_header, self.is_footer, self.page_number, self.text_content
iscai-msft marked this conversation as resolved.
Show resolved Hide resolved
)


class CustomFormModel(object):
"""Represents a model trained from custom forms.
Expand Down Expand Up @@ -621,6 +682,13 @@ def _from_generated(cls, model):
if model.train_result else None
)

def __repr__(self):
return "CustomFormModel(model_id={}, status={}, created_on={}, last_modified={}, models={}, " \
"errors={}, training_documents={})".format(
self.model_id, self.status, self.created_on, self.last_modified, repr(self.models),
repr(self.errors), repr(self.training_documents)
)


class CustomFormSubModel(object):
"""Represents a submodel that extracts fields from a specific type of form.
Expand Down Expand Up @@ -656,6 +724,11 @@ def _from_generated_labeled(cls, model):
form_type="form-" + model.model_info.model_id
)] if model.train_result else None

def __repr__(self):
return "CustomFormSubModel(accuracy={}, fields={}, form_type={})".format(
self.accuracy, repr(self.fields), self.form_type
)


class CustomFormModelField(object):
"""A field that the model will extract from forms it analyzes.
Expand Down Expand Up @@ -685,6 +758,11 @@ def _from_generated_unlabeled(cls, fields):
) for idx, field_name in enumerate(fields)
}

def __repr__(self):
return "CustomFormModelField(label={}, name={}, accuracy={})".format(
self.label, self.name, self.accuracy
)


class TrainingDocumentInfo(object):
"""Report for an individual document used for training
Expand Down Expand Up @@ -717,6 +795,11 @@ def _from_generated(cls, train_result):
errors=FormRecognizerError._from_generated(doc.errors)
) for doc in train_result.training_documents] if train_result.training_documents else None

def __repr__(self):
return "TrainingDocumentInfo(document_name={}, status={}, page_count={}, errors={})".format(
self.document_name, self.status, self.page_count, repr(self.errors)
)


class FormRecognizerError(object):
"""Represents an error that occurred while training.
Expand All @@ -733,6 +816,9 @@ def __init__(self, **kwargs):
def _from_generated(cls, err):
return [cls(code=error.code, message=error.message) for error in err] if err else []

def __repr__(self):
return "FormRecognizerError(code={}, message={})".format(self.code, self.message)


class CustomFormModelInfo(object):
"""Custom model information.
Expand Down Expand Up @@ -762,6 +848,11 @@ def _from_generated(cls, model):
last_modified=model.last_updated_date_time
)

def __repr__(self):
return "CustomFormModelInfo(model_id={}, status={}, created_on={}, last_modified={})".format(
self.model_id, self.status, self.created_on, self.last_modified
)


class AccountProperties(object):
"""Summary of all the custom models on the account.
Expand All @@ -780,3 +871,8 @@ def _from_generated(cls, model):
custom_model_count=model.count,
custom_model_limit=model.limit,
)

def __repr__(self):
return "AccountProperties(custom_model_count={}, custom_model_limit={})".format(
self.custom_model_count, self.custom_model_limit
)
Loading