From fb41130e7d6ee73af7266d7e2518a950d6752a63 Mon Sep 17 00:00:00 2001 From: Krista Pratico Date: Wed, 27 Oct 2021 14:04:53 -0700 Subject: [PATCH] [textanalytics] updating custom samples (#21434) * updating samples to use given training data * updates --- .../sample_multi_category_classify_async.py | 33 +++++------ .../sample_recognize_custom_entities_async.py | 47 +++++---------- .../sample_single_category_classify_async.py | 24 ++++---- .../samples/sample_multi_category_classify.py | 33 +++++------ .../sample_recognize_custom_entities.py | 58 +++++++------------ .../sample_single_category_classify.py | 25 ++++---- .../text_samples/custom_classify_sample.txt | 1 + .../text_samples/custom_entities_sample.txt | 31 ++++++++++ 8 files changed, 128 insertions(+), 124 deletions(-) create mode 100644 sdk/textanalytics/azure-ai-textanalytics/samples/text_samples/custom_classify_sample.txt create mode 100644 sdk/textanalytics/azure-ai-textanalytics/samples/text_samples/custom_entities_sample.txt diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_multi_category_classify_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_multi_category_classify_async.py index 32d61e12aaac..44f8943c17af 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_multi_category_classify_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_multi_category_classify_async.py @@ -10,8 +10,8 @@ FILE: sample_multi_category_classify_async.py DESCRIPTION: - This sample demonstrates how to classify documents into multiple custom categories. Here we have a few - movie plot summaries that must be categorized into movie genres like Sci-Fi, Horror, Comedy, Romance, etc. + This sample demonstrates how to classify documents into multiple custom categories. For example, + movie plot summaries can be categorized into multiple movie genres like Sci-Fi and Horror, or Comedy and Romance, etc. Classifying documents is available as an action type through the begin_analyze_actions API. To train a model to classify your documents, see https://aka.ms/azsdk/textanalytics/customfunctionalities @@ -40,27 +40,26 @@ async def sample_classify_document_multi_categories_async(): key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] project_name = os.environ["MULTI_CATEGORY_CLASSIFY_PROJECT_NAME"] deployed_model_name = os.environ["MULTI_CATEGORY_CLASSIFY_DEPLOYMENT_NAME"] + path_to_sample_document = os.path.abspath( + os.path.join( + os.path.abspath(__file__), + "..", + "..", + "./text_samples/custom_classify_sample.txt", + ) + ) + + with open(path_to_sample_document, "r") as fd: + document = [fd.read()] text_analytics_client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key), ) - documents = [ - "In the not-too-distant future, Earth's dying sun spells the end for humanity. In a last-ditch effort to " - "save the planet, a crew of eight men and women ventures into space with a device that could revive the " - "star. However, an accident, a grave mistake and a distress beacon from a long-lost spaceship throw " - "the crew and its desperate mission into a tailspin.", - - "Despite his family's generations-old ban on music, young Miguel dreams of becoming an accomplished " - "musician like his idol Ernesto de la Cruz. Desperate to prove his talent, Miguel finds himself " - "in the stunning and colorful Land of the Dead. After meeting a charming trickster named Héctor, " - "the two new friends embark on an extraordinary journey to unlock the real story behind Miguel's " - "family history" - ] async with text_analytics_client: poller = await text_analytics_client.begin_analyze_actions( - documents, + document, actions=[ MultiCategoryClassifyAction( project_name=project_name, @@ -74,11 +73,11 @@ async def sample_classify_document_multi_categories_async(): document_results = [] async for page in pages: document_results.append(page) - for doc, classification_results in zip(documents, document_results): + for doc, classification_results in zip(document, document_results): for classification_result in classification_results: if not classification_result.is_error: classifications = classification_result.classifications - print("The movie plot '{}' was classified as the following genres:\n".format(doc)) + print("\nThe movie plot '{}' was classified as the following genres:\n".format(doc)) for classification in classifications: print("'{}' with confidence score {}.".format( classification.category, classification.confidence_score diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_custom_entities_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_custom_entities_async.py index b46a42c61301..d930663d4276 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_custom_entities_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_recognize_custom_entities_async.py @@ -39,19 +39,22 @@ async def sample_recognize_custom_entities_async(): key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] project_name = os.environ["CUSTOM_ENTITIES_PROJECT_NAME"] deployed_model_name = os.environ["CUSTOM_ENTITIES_DEPLOYMENT_NAME"] + path_to_sample_document = os.path.abspath( + os.path.join( + os.path.abspath(__file__), + "..", + "..", + "./text_samples/custom_entities_sample.txt", + ) + ) text_analytics_client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key), ) - document = [ - "The Grantor(s), John Smith, who also appears of record as John A. Smith, for and in consideration of " - "Ten dollars and Zero cents ($10.00) and other good and valuable consideration in hand paid, conveys, and " - "warrants to Jane Doe, the following described real estate, situated in the County of King, State of " - "Washington: Lot A, King County Short Plat Number AAAAAAAA, recorded under Recording Number AAAAAAAAA in " - "King County, Washington." - ] + with open(path_to_sample_document, "r") as fd: + document = [fd.read()] async with text_analytics_client: poller = await text_analytics_client.begin_analyze_actions( @@ -70,34 +73,16 @@ async def sample_recognize_custom_entities_async(): custom_entities_result = result[0] # first document, first result if not custom_entities_result.is_error: for entity in custom_entities_result.entities: - if entity.category == "Seller Name": - print("The seller of the property is {} with confidence score {}.".format( - entity.text, entity.confidence_score) - ) - if entity.category == "Buyer Name": - print("The buyer of the property is {} with confidence score {}.".format( - entity.text, entity.confidence_score) - ) - if entity.category == "Buyer Fee": - print("The buyer fee is {} with confidence score {}.".format( - entity.text, entity.confidence_score) - ) - if entity.category == "Lot Number": - print("The lot number of the property is {} with confidence score {}.".format( - entity.text, entity.confidence_score) - ) - if entity.category == "Short Plat Number": - print("The short plat number of the property is {} with confidence score {}.".format( - entity.text, entity.confidence_score) - ) - if entity.category == "Recording Number": - print("The recording number of the property is {} with confidence score {}.".format( - entity.text, entity.confidence_score) + print( + "Entity '{}' has category '{}' with confidence score of '{}'".format( + entity.text, entity.category, entity.confidence_score ) + ) else: print("...Is an error with code '{}' and message '{}'".format( custom_entities_result.code, custom_entities_result.message - )) + ) + ) async def main(): diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_single_category_classify_async.py b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_single_category_classify_async.py index e516caeef9d1..e55f38d3b54f 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_single_category_classify_async.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/async_samples/sample_single_category_classify_async.py @@ -10,8 +10,8 @@ FILE: sample_single_category_classify_async.py DESCRIPTION: - This sample demonstrates how to classify documents into a single custom category. Here we several - support tickets that need to be classified as internet, printer, email or hardware issues. + This sample demonstrates how to classify documents into a single custom category. For example, + movie plot summaries can be categorized into a single movie genre like Sci-Fi, Horror, Comedy, Romance, etc. Classifying documents is available as an action type through the begin_analyze_actions API. To train a model to classify your documents, see https://aka.ms/azsdk/textanalytics/customfunctionalities @@ -40,22 +40,26 @@ async def sample_classify_document_single_category_async(): key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] project_name = os.environ["SINGLE_CATEGORY_CLASSIFY_PROJECT_NAME"] deployed_model_name = os.environ["SINGLE_CATEGORY_CLASSIFY_DEPLOYMENT_NAME"] + path_to_sample_document = os.path.abspath( + os.path.join( + os.path.abspath(__file__), + "..", + "..", + "./text_samples/custom_classify_sample.txt", + ) + ) text_analytics_client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key), ) - documents = [ - "My internet has stopped working. I tried resetting the router, but it just keeps blinking red.", - "I submitted 3 jobs to print but the printer is unresponsive. I can't see it under my devices either.", - "My computer will not boot. Pushing the power button does nothing - just a black screen.", - "I seem to not be receiving all my emails on time. Emails from 2 days ago show up as just received.", - ] + with open(path_to_sample_document, "r") as fd: + document = [fd.read()] async with text_analytics_client: poller = await text_analytics_client.begin_analyze_actions( - documents, + document, actions=[ SingleCategoryClassifyAction( project_name=project_name, @@ -70,7 +74,7 @@ async def sample_classify_document_single_category_async(): async for page in pages: document_results.append(page) - for doc, classification_results in zip(documents, document_results): + for doc, classification_results in zip(document, document_results): for classification_result in classification_results: if not classification_result.is_error: classification = classification_result.classification diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_multi_category_classify.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_multi_category_classify.py index c69ec1ec685d..3f191c7eda83 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_multi_category_classify.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_multi_category_classify.py @@ -10,8 +10,8 @@ FILE: sample_multi_category_classify.py DESCRIPTION: - This sample demonstrates how to classify documents into multiple custom categories. Here we have a few - movie plot summaries that must be categorized into movie genres like Sci-Fi, Horror, Comedy, Romance, etc. + This sample demonstrates how to classify documents into multiple custom categories. For example, + movie plot summaries can be categorized into multiple movie genres like Sci-Fi and Horror, or Comedy and Romance, etc. Classifying documents is available as an action type through the begin_analyze_actions API. To train a model to classify your documents, see https://aka.ms/azsdk/textanalytics/customfunctionalities @@ -41,27 +41,24 @@ def sample_classify_document_multi_categories(): key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] project_name = os.environ["MULTI_CATEGORY_CLASSIFY_PROJECT_NAME"] deployed_model_name = os.environ["MULTI_CATEGORY_CLASSIFY_DEPLOYMENT_NAME"] + path_to_sample_document = os.path.abspath( + os.path.join( + os.path.abspath(__file__), + "..", + "./text_samples/custom_classify_sample.txt", + ) + ) text_analytics_client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key), ) - documents = [ - "In the not-too-distant future, Earth's dying sun spells the end for humanity. In a last-ditch effort to " - "save the planet, a crew of eight men and women ventures into space with a device that could revive the " - "star. However, an accident, a grave mistake and a distress beacon from a long-lost spaceship throw " - "the crew and its desperate mission into a tailspin.", - - "Despite his family's generations-old ban on music, young Miguel dreams of becoming an accomplished " - "musician like his idol Ernesto de la Cruz. Desperate to prove his talent, Miguel finds himself " - "in the stunning and colorful Land of the Dead. After meeting a charming trickster named Héctor, " - "the two new friends embark on an extraordinary journey to unlock the real story behind Miguel's " - "family history" - ] + with open(path_to_sample_document, "r") as fd: + document = [fd.read()] poller = text_analytics_client.begin_analyze_actions( - documents, + document, actions=[ MultiCategoryClassifyAction( project_name=project_name, @@ -71,11 +68,11 @@ def sample_classify_document_multi_categories(): ) document_results = poller.result() - for doc, classification_results in zip(documents, document_results): + for doc, classification_results in zip(document, document_results): for classification_result in classification_results: if not classification_result.is_error: classifications = classification_result.classifications - print("The movie plot '{}' was classified as the following genres:\n".format(doc)) + print("\nThe movie plot '{}' was classified as the following genres:\n".format(doc)) for classification in classifications: print("'{}' with confidence score {}.".format( classification.category, classification.confidence_score @@ -87,4 +84,4 @@ def sample_classify_document_multi_categories(): if __name__ == "__main__": - sample_classify_document_multi_categories() + sample_classify_document_multi_categories() \ No newline at end of file diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_custom_entities.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_custom_entities.py index 795c6e16b62a..e23f6b1ebc3f 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_custom_entities.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_recognize_custom_entities.py @@ -33,33 +33,34 @@ def sample_recognize_custom_entities(): from azure.core.credentials import AzureKeyCredential from azure.ai.textanalytics import ( TextAnalyticsClient, - RecognizeCustomEntitiesAction + RecognizeCustomEntitiesAction, ) endpoint = os.environ["AZURE_TEXT_ANALYTICS_ENDPOINT"] key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] project_name = os.environ["CUSTOM_ENTITIES_PROJECT_NAME"] deployed_model_name = os.environ["CUSTOM_ENTITIES_DEPLOYMENT_NAME"] + path_to_sample_document = os.path.abspath( + os.path.join( + os.path.abspath(__file__), + "..", + "./text_samples/custom_entities_sample.txt", + ) + ) text_analytics_client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key), ) - document = [ - "The Grantor(s), John Smith, who also appears of record as John A. Smith, for and in consideration of " - "Ten dollars and Zero cents ($10.00) and other good and valuable consideration in hand paid, conveys, and " - "warrants to Jane Doe, the following described real estate, situated in the County of King, State of " - "Washington: Lot A, King County Short Plat Number AAAAAAAA, recorded under Recording Number AAAAAAAAA in " - "King County, Washington." - ] + with open(path_to_sample_document, "r") as fd: + document = [fd.read()] poller = text_analytics_client.begin_analyze_actions( document, actions=[ RecognizeCustomEntitiesAction( - project_name=project_name, - deployment_name=deployed_model_name + project_name=project_name, deployment_name=deployed_model_name ), ], ) @@ -69,35 +70,18 @@ def sample_recognize_custom_entities(): custom_entities_result = result[0] # first document, first result if not custom_entities_result.is_error: for entity in custom_entities_result.entities: - if entity.category == "Seller Name": - print("The seller of the property is {} with confidence score {}.".format( - entity.text, entity.confidence_score) - ) - if entity.category == "Buyer Name": - print("The buyer of the property is {} with confidence score {}.".format( - entity.text, entity.confidence_score) - ) - if entity.category == "Buyer Fee": - print("The buyer fee is {} with confidence score {}.".format( - entity.text, entity.confidence_score) - ) - if entity.category == "Lot Number": - print("The lot number of the property is {} with confidence score {}.".format( - entity.text, entity.confidence_score) - ) - if entity.category == "Short Plat Number": - print("The short plat number of the property is {} with confidence score {}.".format( - entity.text, entity.confidence_score) - ) - if entity.category == "Recording Number": - print("The recording number of the property is {} with confidence score {}.".format( - entity.text, entity.confidence_score) + print( + "Entity '{}' has category '{}' with confidence score of '{}'".format( + entity.text, entity.category, entity.confidence_score ) + ) else: - print("...Is an error with code '{}' and message '{}'".format( - custom_entities_result.code, custom_entities_result.message - )) + print( + "...Is an error with code '{}' and message '{}'".format( + custom_entities_result.code, custom_entities_result.message + ) + ) if __name__ == "__main__": - sample_recognize_custom_entities() + sample_recognize_custom_entities() \ No newline at end of file diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_single_category_classify.py b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_single_category_classify.py index 1baeaaf2acd7..cc4d62b1b495 100644 --- a/sdk/textanalytics/azure-ai-textanalytics/samples/sample_single_category_classify.py +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/sample_single_category_classify.py @@ -10,8 +10,8 @@ FILE: sample_single_category_classify.py DESCRIPTION: - This sample demonstrates how to classify documents into a single custom category. Here we several - support tickets that need to be classified as internet, printer, email or hardware issues. + This sample demonstrates how to classify documents into a single custom category. For example, + movie plot summaries can be categorized into a single movie genre like Sci-Fi, Horror, Comedy, Romance, etc. Classifying documents is available as an action type through the begin_analyze_actions API. To train a model to classify your documents, see https://aka.ms/azsdk/textanalytics/customfunctionalities @@ -41,21 +41,24 @@ def sample_classify_document_single_category(): key = os.environ["AZURE_TEXT_ANALYTICS_KEY"] project_name = os.environ["SINGLE_CATEGORY_CLASSIFY_PROJECT_NAME"] deployed_model_name = os.environ["SINGLE_CATEGORY_CLASSIFY_DEPLOYMENT_NAME"] + path_to_sample_document = os.path.abspath( + os.path.join( + os.path.abspath(__file__), + "..", + "./text_samples/custom_classify_sample.txt", + ) + ) text_analytics_client = TextAnalyticsClient( endpoint=endpoint, credential=AzureKeyCredential(key), ) - documents = [ - "My internet has stopped working. I tried resetting the router, but it just keeps blinking red.", - "I submitted 3 jobs to print but the printer is unresponsive. I can't see it under my devices either.", - "My computer will not boot. Pushing the power button does nothing - just a black screen.", - "I seem to not be receiving all my emails on time. Emails from 2 days ago show up as just received.", - ] + with open(path_to_sample_document, "r") as fd: + document = [fd.read()] poller = text_analytics_client.begin_analyze_actions( - documents, + document, actions=[ SingleCategoryClassifyAction( project_name=project_name, @@ -65,7 +68,7 @@ def sample_classify_document_single_category(): ) document_results = poller.result() - for doc, classification_results in zip(documents, document_results): + for doc, classification_results in zip(document, document_results): for classification_result in classification_results: if not classification_result.is_error: classification = classification_result.classification @@ -79,4 +82,4 @@ def sample_classify_document_single_category(): if __name__ == "__main__": - sample_classify_document_single_category() + sample_classify_document_single_category() \ No newline at end of file diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/text_samples/custom_classify_sample.txt b/sdk/textanalytics/azure-ai-textanalytics/samples/text_samples/custom_classify_sample.txt new file mode 100644 index 000000000000..a2b0b87c6dde --- /dev/null +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/text_samples/custom_classify_sample.txt @@ -0,0 +1 @@ +14-year-old Danny O'Neil is madly in love with his teacher, Miss Peggy Noble . Given the fact that she is engaged to the cold-tempered and vicious gym teacher, Roy Kilton , who is nicknamed Jackjaw for his constant threat of breaking his pupils' jaws, Danny goes through his school days somewhat uninspired and suffers in silence. One day, he oversees his geeky friend Lloyd Duffy , who happens to live next door with his uncle Herb and aunt May , growing fruits with an experimental growth accelerator. Danny becomes enthusiastic of turning himself into a grown man with the same machine, in order to break up Miss Noble's engagement, as well as convincing her to give him a chance. Lloyd is reluctant to help him out, aware of everything that could go wrong, so Danny secretly uses the machine at night. He saw that the timing was perfect, considering that his parents will leave the house for a week. Unaware of the consequences, he turns into a 30-year-old man . The next day, Lloyd immediately starts working on a machine with the opposite effect so Danny will be able to return to his 14-year-old body soon. During this process, Danny visits the high school to pursue Miss Noble. When he arrives, he is mistaken as the school newest principal, Harold Forndexter. He not only impresses his assistant Louisa Horton with the introduction of his new rules - which include having as much fun as possible - but also Peggy, who admires his youthful approach of life. Much to the dismay of Kilton, she agrees to go on a date with 'Harold'. Even though Kilton follows their every step, Peggy has a splendid evening with the new principal, and they almost kiss at the end of the night. During a school dance, 'Harold' convinces Peggy that Kilton is not right for her, and she breaks off the engagement. Immediately after, 'Harold' and Peggy become a couple. Kilton, refusing to accept this, tries to find out more on Forndexter, and finds out that 'Harold' is an impostor, as the real Forndexter looks quite differently. He immediately warns the police, who arrive quickly to arrest him. While 'Harold' tries to escape, Lloyd informs him that his machine is complete. After getting rid of the cops, 'Harold' tells Peggy that he has to leave town for good and then becomes 14 again. Peggy witnesses this transformation, and realizes that 'Harold' was actually Danny, one of her favorite students. Because she has fallen in love with him, she uses the same machine to turn herself into a 14-year-old , which enables her to be with Danny. Meanwhile, Lloyd turns himself into an old professor, Mr. Lloyd , and starts working at the same high school as a teacher. \ No newline at end of file diff --git a/sdk/textanalytics/azure-ai-textanalytics/samples/text_samples/custom_entities_sample.txt b/sdk/textanalytics/azure-ai-textanalytics/samples/text_samples/custom_entities_sample.txt new file mode 100644 index 000000000000..840a7ed54405 --- /dev/null +++ b/sdk/textanalytics/azure-ai-textanalytics/samples/text_samples/custom_entities_sample.txt @@ -0,0 +1,31 @@ +Date 6/29/2018 + +This is a Loan agreement between the two individuals mentioned below in the parties section of the agreement. + +I. Parties of agreement + +- Parker McLean with a mailing address of 9876 Fusce Rd, City of Frederick, State of Nebraska, (the "Borrower") +- Dylan Williams with a mailing address of 6789 May Street, City of Winchester, State of Kentucky (the "Lender") + +II. Amount +The loan amount given by lender to borrower is six hundred twenty-three thousand four hundred sixty-three Dollars ($623,463.00) ("The Note") + +III. Interest +The Note shall bear interest five percent (5%) compounded annually. + +IV. Payment +The amount mentioned in this agreement (the "Note"), including the principal and any accrued interest, is + +V. Payment Terms +Any delay in payment is subject to a fine with a flat amount of $50 for every week the payment is delayed. +All payments made by the Borrower shall be go into settling the the accrued interest and any late fess and then into the payment of the principal amount. + +VI. Prepayment +The borrower is able to pay back the Note in full at any time, thus terminating this agreement. +The borrower also can make additional payments at any time and this will take of from the amount of the latest installments. + +VII. Acceleration. +In case of Borrower's failure to pay any part of the principal or interest as and when due under this Note; or Borrower's becoming insolvent or not paying its debts as they become due. The lender has the right to declare an "Event of Acceleration" in which case the Lender has the right to to declare this Note immediately due and payable + +IX. Succession +This Note shall outlive the borrower and/or the lender in the even of their death. This note shall be binging to any of their successors. \ No newline at end of file