Skip to content

Commit

Permalink
[textanalytics] updating custom samples (#21434)
Browse files Browse the repository at this point in the history
* updating samples to use given training data

* updates
  • Loading branch information
kristapratico authored Oct 27, 2021
1 parent 163938d commit fb41130
Show file tree
Hide file tree
Showing 8 changed files with 128 additions and 124 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
FILE: sample_multi_category_classify_async.py
DESCRIPTION:
This sample demonstrates how to classify documents into multiple custom categories. Here we have a few
movie plot summaries that must be categorized into movie genres like Sci-Fi, Horror, Comedy, Romance, etc.
This sample demonstrates how to classify documents into multiple custom categories. For example,
movie plot summaries can be categorized into multiple movie genres like Sci-Fi and Horror, or Comedy and Romance, etc.
Classifying documents is available as an action type through the begin_analyze_actions API.
To train a model to classify your documents, see https://aka.ms/azsdk/textanalytics/customfunctionalities
Expand Down Expand Up @@ -40,27 +40,26 @@ async def sample_classify_document_multi_categories_async():
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
project_name = os.environ["MULTI_CATEGORY_CLASSIFY_PROJECT_NAME"]
deployed_model_name = os.environ["MULTI_CATEGORY_CLASSIFY_DEPLOYMENT_NAME"]
path_to_sample_document = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"./text_samples/custom_classify_sample.txt",
)
)

with open(path_to_sample_document, "r") as fd:
document = [fd.read()]

text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=AzureKeyCredential(key),
)

documents = [
"In the not-too-distant future, Earth's dying sun spells the end for humanity. In a last-ditch effort to "
"save the planet, a crew of eight men and women ventures into space with a device that could revive the "
"star. However, an accident, a grave mistake and a distress beacon from a long-lost spaceship throw "
"the crew and its desperate mission into a tailspin.",

"Despite his family's generations-old ban on music, young Miguel dreams of becoming an accomplished "
"musician like his idol Ernesto de la Cruz. Desperate to prove his talent, Miguel finds himself "
"in the stunning and colorful Land of the Dead. After meeting a charming trickster named Héctor, "
"the two new friends embark on an extraordinary journey to unlock the real story behind Miguel's "
"family history"
]
async with text_analytics_client:
poller = await text_analytics_client.begin_analyze_actions(
documents,
document,
actions=[
MultiCategoryClassifyAction(
project_name=project_name,
Expand All @@ -74,11 +73,11 @@ async def sample_classify_document_multi_categories_async():
document_results = []
async for page in pages:
document_results.append(page)
for doc, classification_results in zip(documents, document_results):
for doc, classification_results in zip(document, document_results):
for classification_result in classification_results:
if not classification_result.is_error:
classifications = classification_result.classifications
print("The movie plot '{}' was classified as the following genres:\n".format(doc))
print("\nThe movie plot '{}' was classified as the following genres:\n".format(doc))
for classification in classifications:
print("'{}' with confidence score {}.".format(
classification.category, classification.confidence_score
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,19 +39,22 @@ async def sample_recognize_custom_entities_async():
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
project_name = os.environ["CUSTOM_ENTITIES_PROJECT_NAME"]
deployed_model_name = os.environ["CUSTOM_ENTITIES_DEPLOYMENT_NAME"]
path_to_sample_document = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"./text_samples/custom_entities_sample.txt",
)
)

text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=AzureKeyCredential(key),
)

document = [
"The Grantor(s), John Smith, who also appears of record as John A. Smith, for and in consideration of "
"Ten dollars and Zero cents ($10.00) and other good and valuable consideration in hand paid, conveys, and "
"warrants to Jane Doe, the following described real estate, situated in the County of King, State of "
"Washington: Lot A, King County Short Plat Number AAAAAAAA, recorded under Recording Number AAAAAAAAA in "
"King County, Washington."
]
with open(path_to_sample_document, "r") as fd:
document = [fd.read()]

async with text_analytics_client:
poller = await text_analytics_client.begin_analyze_actions(
Expand All @@ -70,34 +73,16 @@ async def sample_recognize_custom_entities_async():
custom_entities_result = result[0] # first document, first result
if not custom_entities_result.is_error:
for entity in custom_entities_result.entities:
if entity.category == "Seller Name":
print("The seller of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Buyer Name":
print("The buyer of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Buyer Fee":
print("The buyer fee is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Lot Number":
print("The lot number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Short Plat Number":
print("The short plat number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Recording Number":
print("The recording number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
print(
"Entity '{}' has category '{}' with confidence score of '{}'".format(
entity.text, entity.category, entity.confidence_score
)
)
else:
print("...Is an error with code '{}' and message '{}'".format(
custom_entities_result.code, custom_entities_result.message
))
)
)


async def main():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
FILE: sample_single_category_classify_async.py
DESCRIPTION:
This sample demonstrates how to classify documents into a single custom category. Here we several
support tickets that need to be classified as internet, printer, email or hardware issues.
This sample demonstrates how to classify documents into a single custom category. For example,
movie plot summaries can be categorized into a single movie genre like Sci-Fi, Horror, Comedy, Romance, etc.
Classifying documents is available as an action type through the begin_analyze_actions API.
To train a model to classify your documents, see https://aka.ms/azsdk/textanalytics/customfunctionalities
Expand Down Expand Up @@ -40,22 +40,26 @@ async def sample_classify_document_single_category_async():
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
project_name = os.environ["SINGLE_CATEGORY_CLASSIFY_PROJECT_NAME"]
deployed_model_name = os.environ["SINGLE_CATEGORY_CLASSIFY_DEPLOYMENT_NAME"]
path_to_sample_document = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"..",
"./text_samples/custom_classify_sample.txt",
)
)

text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=AzureKeyCredential(key),
)

documents = [
"My internet has stopped working. I tried resetting the router, but it just keeps blinking red.",
"I submitted 3 jobs to print but the printer is unresponsive. I can't see it under my devices either.",
"My computer will not boot. Pushing the power button does nothing - just a black screen.",
"I seem to not be receiving all my emails on time. Emails from 2 days ago show up as just received.",
]
with open(path_to_sample_document, "r") as fd:
document = [fd.read()]

async with text_analytics_client:
poller = await text_analytics_client.begin_analyze_actions(
documents,
document,
actions=[
SingleCategoryClassifyAction(
project_name=project_name,
Expand All @@ -70,7 +74,7 @@ async def sample_classify_document_single_category_async():
async for page in pages:
document_results.append(page)

for doc, classification_results in zip(documents, document_results):
for doc, classification_results in zip(document, document_results):
for classification_result in classification_results:
if not classification_result.is_error:
classification = classification_result.classification
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
FILE: sample_multi_category_classify.py
DESCRIPTION:
This sample demonstrates how to classify documents into multiple custom categories. Here we have a few
movie plot summaries that must be categorized into movie genres like Sci-Fi, Horror, Comedy, Romance, etc.
This sample demonstrates how to classify documents into multiple custom categories. For example,
movie plot summaries can be categorized into multiple movie genres like Sci-Fi and Horror, or Comedy and Romance, etc.
Classifying documents is available as an action type through the begin_analyze_actions API.
To train a model to classify your documents, see https://aka.ms/azsdk/textanalytics/customfunctionalities
Expand Down Expand Up @@ -41,27 +41,24 @@ def sample_classify_document_multi_categories():
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
project_name = os.environ["MULTI_CATEGORY_CLASSIFY_PROJECT_NAME"]
deployed_model_name = os.environ["MULTI_CATEGORY_CLASSIFY_DEPLOYMENT_NAME"]
path_to_sample_document = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"./text_samples/custom_classify_sample.txt",
)
)

text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=AzureKeyCredential(key),
)

documents = [
"In the not-too-distant future, Earth's dying sun spells the end for humanity. In a last-ditch effort to "
"save the planet, a crew of eight men and women ventures into space with a device that could revive the "
"star. However, an accident, a grave mistake and a distress beacon from a long-lost spaceship throw "
"the crew and its desperate mission into a tailspin.",

"Despite his family's generations-old ban on music, young Miguel dreams of becoming an accomplished "
"musician like his idol Ernesto de la Cruz. Desperate to prove his talent, Miguel finds himself "
"in the stunning and colorful Land of the Dead. After meeting a charming trickster named Héctor, "
"the two new friends embark on an extraordinary journey to unlock the real story behind Miguel's "
"family history"
]
with open(path_to_sample_document, "r") as fd:
document = [fd.read()]

poller = text_analytics_client.begin_analyze_actions(
documents,
document,
actions=[
MultiCategoryClassifyAction(
project_name=project_name,
Expand All @@ -71,11 +68,11 @@ def sample_classify_document_multi_categories():
)

document_results = poller.result()
for doc, classification_results in zip(documents, document_results):
for doc, classification_results in zip(document, document_results):
for classification_result in classification_results:
if not classification_result.is_error:
classifications = classification_result.classifications
print("The movie plot '{}' was classified as the following genres:\n".format(doc))
print("\nThe movie plot '{}' was classified as the following genres:\n".format(doc))
for classification in classifications:
print("'{}' with confidence score {}.".format(
classification.category, classification.confidence_score
Expand All @@ -87,4 +84,4 @@ def sample_classify_document_multi_categories():


if __name__ == "__main__":
sample_classify_document_multi_categories()
sample_classify_document_multi_categories()
Original file line number Diff line number Diff line change
Expand Up @@ -33,33 +33,34 @@ def sample_recognize_custom_entities():
from azure.core.credentials import AzureKeyCredential
from azure.ai.textanalytics import (
TextAnalyticsClient,
RecognizeCustomEntitiesAction
RecognizeCustomEntitiesAction,
)

endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"]
key = os.environ["AZURE_TEXT_ANALYTICS_KEY"]
project_name = os.environ["CUSTOM_ENTITIES_PROJECT_NAME"]
deployed_model_name = os.environ["CUSTOM_ENTITIES_DEPLOYMENT_NAME"]
path_to_sample_document = os.path.abspath(
os.path.join(
os.path.abspath(__file__),
"..",
"./text_samples/custom_entities_sample.txt",
)
)

text_analytics_client = TextAnalyticsClient(
endpoint=endpoint,
credential=AzureKeyCredential(key),
)

document = [
"The Grantor(s), John Smith, who also appears of record as John A. Smith, for and in consideration of "
"Ten dollars and Zero cents ($10.00) and other good and valuable consideration in hand paid, conveys, and "
"warrants to Jane Doe, the following described real estate, situated in the County of King, State of "
"Washington: Lot A, King County Short Plat Number AAAAAAAA, recorded under Recording Number AAAAAAAAA in "
"King County, Washington."
]
with open(path_to_sample_document, "r") as fd:
document = [fd.read()]

poller = text_analytics_client.begin_analyze_actions(
document,
actions=[
RecognizeCustomEntitiesAction(
project_name=project_name,
deployment_name=deployed_model_name
project_name=project_name, deployment_name=deployed_model_name
),
],
)
Expand All @@ -69,35 +70,18 @@ def sample_recognize_custom_entities():
custom_entities_result = result[0] # first document, first result
if not custom_entities_result.is_error:
for entity in custom_entities_result.entities:
if entity.category == "Seller Name":
print("The seller of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Buyer Name":
print("The buyer of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Buyer Fee":
print("The buyer fee is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Lot Number":
print("The lot number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Short Plat Number":
print("The short plat number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
)
if entity.category == "Recording Number":
print("The recording number of the property is {} with confidence score {}.".format(
entity.text, entity.confidence_score)
print(
"Entity '{}' has category '{}' with confidence score of '{}'".format(
entity.text, entity.category, entity.confidence_score
)
)
else:
print("...Is an error with code '{}' and message '{}'".format(
custom_entities_result.code, custom_entities_result.message
))
print(
"...Is an error with code '{}' and message '{}'".format(
custom_entities_result.code, custom_entities_result.message
)
)


if __name__ == "__main__":
sample_recognize_custom_entities()
sample_recognize_custom_entities()
Loading

0 comments on commit fb41130

Please sign in to comment.