Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[textanalytics] updating custom samples #21434

Merged
merged 2 commits into from
Oct 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
FILE: sample_multi_category_classify_async.py

DESCRIPTION:
This sample demonstrates how to classify documents into multiple custom categories. Here we have a few
movie plot summaries that must be categorized into movie genres like Sci-Fi, Horror, Comedy, Romance, etc.
This sample demonstrates how to classify documents into multiple custom categories. For example,
movie plot summaries can be categorized into multiple movie genres like Sci-Fi and Horror, or Comedy and Romance, etc.
Classifying documents is available as an action type through the begin_analyze_actions API.

To train a model to classify your documents, see https://aka.ms/azsdk/textanalytics/customfunctionalities
Expand Down Expand Up @@ -40,27 +40,26 @@ async def sample_classify_document_multi_categories_async():
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
project_name = os.environ["MULTI_CATEGORY_CLASSIFY_PROJECT_NAME"]
deployed_model_name = os.environ["MULTI_CATEGORY_CLASSIFY_DEPLOYMENT_NAME"]
path_to_sample_document = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"./text_samples/custom_classify_sample.txt",
)
)

with open(path_to_sample_document, "r") as fd:
document = [fd.read()]

text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=AzureKeyCredential(key),
)

documents = [
"In the not-too-distant future, Earth's dying sun spells the end for humanity. In a last-ditch effort to "
"save the planet, a crew of eight men and women ventures into space with a device that could revive the "
"star. However, an accident, a grave mistake and a distress beacon from a long-lost spaceship throw "
"the crew and its desperate mission into a tailspin.",

"Despite his family's generations-old ban on music, young Miguel dreams of becoming an accomplished "
"musician like his idol Ernesto de la Cruz. Desperate to prove his talent, Miguel finds himself "
"in the stunning and colorful Land of the Dead. After meeting a charming trickster named Héctor, "
"the two new friends embark on an extraordinary journey to unlock the real story behind Miguel's "
"family history"
]
async with text_analytics_client:
poller = await text_analytics_client.begin_analyze_actions(
documents,
document,
actions=[
MultiCategoryClassifyAction(
project_name=project_name,
Expand All @@ -74,11 +73,11 @@ async def sample_classify_document_multi_categories_async():
document_results = []
async for page in pages:
document_results.append(page)
for doc, classification_results in zip(documents, document_results):
for doc, classification_results in zip(document, document_results):
for classification_result in classification_results:
if not classification_result.is_error:
classifications = classification_result.classifications
print("The movie plot '{}' was classified as the following genres:\n".format(doc))
print("\nThe movie plot '{}' was classified as the following genres:\n".format(doc))
for classification in classifications:
print("'{}' with confidence score {}.".format(
classification.category, classification.confidence_score
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,22 @@ async def sample_recognize_custom_entities_async():
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
project_name = os.environ["CUSTOM_ENTITIES_PROJECT_NAME"]
deployed_model_name = os.environ["CUSTOM_ENTITIES_DEPLOYMENT_NAME"]
path_to_sample_document = os.path.abspath(
os.path.join(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cool!! is this because of the feedback the service team/arch board gave us?
Also, are these the document the service team is using in their samples? (I haven't looked :( )

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the text is quite long. And yes, taken from their sample set.

os.path.abspath(__file__),
"..",
"..",
"./text_samples/custom_entities_sample.txt",
)
)

text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=AzureKeyCredential(key),
)

document = [
"The Grantor(s), John Smith, who also appears of record as John A. Smith, for and in consideration of "
"Ten dollars and Zero cents ($10.00) and other good and valuable consideration in hand paid, conveys, and "
"warrants to Jane Doe, the following described real estate, situated in the County of King, State of "
"Washington: Lot A, King County Short Plat Number AAAAAAAA, recorded under Recording Number AAAAAAAAA in "
"King County, Washington."
]
with open(path_to_sample_document, "r") as fd:
document = [fd.read()]

async with text_analytics_client:
poller = await text_analytics_client.begin_analyze_actions(
Expand All @@ -70,34 +73,16 @@ async def sample_recognize_custom_entities_async():
custom_entities_result = result[0] # first document, first result
if not custom_entities_result.is_error:
for entity in custom_entities_result.entities:
if entity.category == "Seller Name":
print("The seller of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Buyer Name":
print("The buyer of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Buyer Fee":
print("The buyer fee is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Lot Number":
print("The lot number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Short Plat Number":
print("The short plat number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Recording Number":
print("The recording number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
print(
"Entity '{}' has category '{}' with confidence score of '{}'".format(
entity.text, entity.category, entity.confidence_score
)
)
else:
print("...Is an error with code '{}' and message '{}'".format(
custom_entities_result.code, custom_entities_result.message
))
)
)


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
FILE: sample_single_category_classify_async.py

DESCRIPTION:
This sample demonstrates how to classify documents into a single custom category. Here we several
support tickets that need to be classified as internet, printer, email or hardware issues.
This sample demonstrates how to classify documents into a single custom category. For example,
movie plot summaries can be categorized into a single movie genre like Sci-Fi, Horror, Comedy, Romance, etc.
Classifying documents is available as an action type through the begin_analyze_actions API.

To train a model to classify your documents, see https://aka.ms/azsdk/textanalytics/customfunctionalities
Expand Down Expand Up @@ -40,22 +40,26 @@ async def sample_classify_document_single_category_async():
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
project_name = os.environ["SINGLE_CATEGORY_CLASSIFY_PROJECT_NAME"]
deployed_model_name = os.environ["SINGLE_CATEGORY_CLASSIFY_DEPLOYMENT_NAME"]
path_to_sample_document = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"./text_samples/custom_classify_sample.txt",
)
)

text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=AzureKeyCredential(key),
)

documents = [
"My internet has stopped working. I tried resetting the router, but it just keeps blinking red.",
"I submitted 3 jobs to print but the printer is unresponsive. I can't see it under my devices either.",
"My computer will not boot. Pushing the power button does nothing - just a black screen.",
"I seem to not be receiving all my emails on time. Emails from 2 days ago show up as just received.",
]
with open(path_to_sample_document, "r") as fd:
document = [fd.read()]

async with text_analytics_client:
poller = await text_analytics_client.begin_analyze_actions(
documents,
document,
actions=[
SingleCategoryClassifyAction(
project_name=project_name,
Expand All @@ -70,7 +74,7 @@ async def sample_classify_document_single_category_async():
async for page in pages:
document_results.append(page)

for doc, classification_results in zip(documents, document_results):
for doc, classification_results in zip(document, document_results):
for classification_result in classification_results:
if not classification_result.is_error:
classification = classification_result.classification
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
FILE: sample_multi_category_classify.py

DESCRIPTION:
This sample demonstrates how to classify documents into multiple custom categories. Here we have a few
movie plot summaries that must be categorized into movie genres like Sci-Fi, Horror, Comedy, Romance, etc.
This sample demonstrates how to classify documents into multiple custom categories. For example,
movie plot summaries can be categorized into multiple movie genres like Sci-Fi and Horror, or Comedy and Romance, etc.
Classifying documents is available as an action type through the begin_analyze_actions API.

To train a model to classify your documents, see https://aka.ms/azsdk/textanalytics/customfunctionalities
Expand Down Expand Up @@ -41,27 +41,24 @@ def sample_classify_document_multi_categories():
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
project_name = os.environ["MULTI_CATEGORY_CLASSIFY_PROJECT_NAME"]
deployed_model_name = os.environ["MULTI_CATEGORY_CLASSIFY_DEPLOYMENT_NAME"]
path_to_sample_document = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"./text_samples/custom_classify_sample.txt",
)
)

text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=AzureKeyCredential(key),
)

documents = [
"In the not-too-distant future, Earth's dying sun spells the end for humanity. In a last-ditch effort to "
"save the planet, a crew of eight men and women ventures into space with a device that could revive the "
"star. However, an accident, a grave mistake and a distress beacon from a long-lost spaceship throw "
"the crew and its desperate mission into a tailspin.",

"Despite his family's generations-old ban on music, young Miguel dreams of becoming an accomplished "
"musician like his idol Ernesto de la Cruz. Desperate to prove his talent, Miguel finds himself "
"in the stunning and colorful Land of the Dead. After meeting a charming trickster named Héctor, "
"the two new friends embark on an extraordinary journey to unlock the real story behind Miguel's "
"family history"
]
with open(path_to_sample_document, "r") as fd:
document = [fd.read()]

poller = text_analytics_client.begin_analyze_actions(
documents,
document,
actions=[
MultiCategoryClassifyAction(
project_name=project_name,
Expand All @@ -71,11 +68,11 @@ def sample_classify_document_multi_categories():
)

document_results = poller.result()
for doc, classification_results in zip(documents, document_results):
for doc, classification_results in zip(document, document_results):
for classification_result in classification_results:
if not classification_result.is_error:
classifications = classification_result.classifications
print("The movie plot '{}' was classified as the following genres:\n".format(doc))
print("\nThe movie plot '{}' was classified as the following genres:\n".format(doc))
for classification in classifications:
print("'{}' with confidence score {}.".format(
classification.category, classification.confidence_score
Expand All @@ -87,4 +84,4 @@ def sample_classify_document_multi_categories():


if __name__ == "__main__":
sample_classify_document_multi_categories()
sample_classify_document_multi_categories()
Original file line number Diff line number Diff line change
Expand Up @@ -33,33 +33,34 @@ def sample_recognize_custom_entities():
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import (
TextAnalyticsClient,
RecognizeCustomEntitiesAction
RecognizeCustomEntitiesAction,
)

endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
project_name = os.environ["CUSTOM_ENTITIES_PROJECT_NAME"]
deployed_model_name = os.environ["CUSTOM_ENTITIES_DEPLOYMENT_NAME"]
path_to_sample_document = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"./text_samples/custom_entities_sample.txt",
)
)

text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=AzureKeyCredential(key),
)

document = [
"The Grantor(s), John Smith, who also appears of record as John A. Smith, for and in consideration of "
"Ten dollars and Zero cents ($10.00) and other good and valuable consideration in hand paid, conveys, and "
"warrants to Jane Doe, the following described real estate, situated in the County of King, State of "
"Washington: Lot A, King County Short Plat Number AAAAAAAA, recorded under Recording Number AAAAAAAAA in "
"King County, Washington."
]
with open(path_to_sample_document, "r") as fd:
document = [fd.read()]

poller = text_analytics_client.begin_analyze_actions(
document,
actions=[
RecognizeCustomEntitiesAction(
project_name=project_name,
deployment_name=deployed_model_name
project_name=project_name, deployment_name=deployed_model_name
),
],
)
Expand All @@ -69,35 +70,18 @@ def sample_recognize_custom_entities():
custom_entities_result = result[0] # first document, first result
if not custom_entities_result.is_error:
for entity in custom_entities_result.entities:
if entity.category == "Seller Name":
print("The seller of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Buyer Name":
print("The buyer of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Buyer Fee":
print("The buyer fee is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Lot Number":
print("The lot number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Short Plat Number":
print("The short plat number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Recording Number":
print("The recording number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
print(
"Entity '{}' has category '{}' with confidence score of '{}'".format(
entity.text, entity.category, entity.confidence_score
)
)
else:
print("...Is an error with code '{}' and message '{}'".format(
custom_entities_result.code, custom_entities_result.message
))
print(
"...Is an error with code '{}' and message '{}'".format(
custom_entities_result.code, custom_entities_result.message
)
)


if __name__ == "__main__":
sample_recognize_custom_entities()
sample_recognize_custom_entities()
Loading