Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[text analytics] Analyze updates for v5.1.0b6 #17003

Merged
14 commits merged into from
Mar 5, 2021
2 changes: 1 addition & 1 deletion sdk/textanalytics/azure-ai-textanalytics/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
- Renamed classes `AspectSentiment` and `OpinionSentiment` to `TargetSentiment` and `AssessmentSentiment` respectively.

**New Features**

- Added `RecognizeLinkedEntitiesAction` as a supported action type for `begin_analyze_batch_actions`.
- Added parameter `categories_filter` to the `recognize_pii_entities` client method.
- Added enum `PiiEntityCategoryType`.
- Add property `normalized_text` to `HealthcareEntity`. This property is a normalized version of the `text` property that already
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
HealthcareEntity,
HealthcareEntityDataSource,
RecognizeEntitiesAction,
RecognizeLinkedEntitiesAction,
RecognizePiiEntitiesAction,
ExtractKeyPhrasesAction,
AnalyzeBatchActionsResult,
Expand Down Expand Up @@ -82,6 +83,7 @@
'HealthcareEntity',
'HealthcareEntityDataSource',
'RecognizeEntitiesAction',
'RecognizeLinkedEntitiesAction',
'RecognizePiiEntitiesAction',
'ExtractKeyPhrasesAction',
'AnalyzeBatchActionsResult',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from azure.core.polling._async_poller import PollingReturnType


_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallysucceeded"])
_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallycompleted"])
_FAILED = frozenset(["failed"])
_SUCCEEDED = frozenset(["succeeded", "partiallysucceeded"])
_SUCCEEDED = frozenset(["succeeded", "partiallycompleted"])


class TextAnalyticsAsyncLROPollingMethod(AsyncLROBasePolling):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ def __init__(self, *args, **kwargs):

class AnalyzeResultAsync(AsyncItemPaged):
def __init__(self, *args, **kwargs):
self.statistics = kwargs.pop('statistics')
self.statistics = kwargs.pop('statistics', None)
super(AnalyzeResultAsync, self).__init__(*args, **kwargs)
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from azure.core.polling import LROPoller
from azure.core.polling.base_polling import LROBasePolling, OperationResourcePolling, OperationFailed, BadStatus

_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallysucceeded"])
_FINISHED = frozenset(["succeeded", "cancelled", "failed", "partiallycompleted"])
_FAILED = frozenset(["failed"])
_SUCCEEDED = frozenset(["succeeded", "partiallysucceeded"])
_SUCCEEDED = frozenset(["succeeded", "partiallycompleted"])


class TextAnalyticsOperationResourcePolling(OperationResourcePolling):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,7 @@ class AnalyzeBatchActionsType(str, Enum):
RECOGNIZE_ENTITIES = "recognize_entities" #: Entities Recognition action.
RECOGNIZE_PII_ENTITIES = "recognize_pii_entities" #: PII Entities Recognition action.
EXTRACT_KEY_PHRASES = "extract_key_phrases" #: Key Phrase Extraction action.
RECOGNIZE_LINKED_ENTITIES = "recognize_linked_entities" #: Linked Entities Recognition action.


class AnalyzeBatchActionsResult(DictMixin):
Expand All @@ -1377,20 +1378,24 @@ class AnalyzeBatchActionsResult(DictMixin):
:vartype action_type: str or ~azure.ai.textanalytics.AnalyzeBatchActionsType
:ivar ~datetime.datetime completed_on: Date and time (UTC) when the result completed
on the service.
:ivar statistics: Overall statistics for the action result.
:vartype statistics: ~azure.ai.RequestStatistics
"""
def __init__(self, **kwargs):
self.document_results = kwargs.get("document_results")
self.is_error = False
self.action_type = kwargs.get("action_type")
self.completed_on = kwargs.get("completed_on")
self.statistics = kwargs.get("statistics")

def __repr__(self):
return "AnalyzeBatchActionsResult(document_results={}, is_error={}, action_type={}, completed_on={})" \
.format(
return "AnalyzeBatchActionsResult(document_results={}, is_error={}, action_type={}, completed_on={}, " \
"statistics={})".format(
repr(self.document_results),
self.is_error,
self.action_type,
self.completed_on
self.completed_on,
repr(self.statistics)
)[:1024]

class AnalyzeBatchActionsError(DictMixin):
Expand Down Expand Up @@ -1527,6 +1532,44 @@ def to_generated(self):
)
)


class RecognizeLinkedEntitiesAction(DictMixin):
"""RecognizeEntitiesAction encapsulates the parameters for starting a long-running Linked Entities
Recognition operation.

If you just want to recognize linked entities in a list of documents, and not perform a batch
of long running actions on the input of documents, call method `recognize_linked_entities` instead
of interfacing with this model.

:keyword str model_version: The model version to use for the analysis.
:keyword str string_index_type: Specifies the method used to interpret string offsets.
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
you can also pass in `Utf16CodePoint` or TextElements_v8`. For additional information
see https://aka.ms/text-analytics-offsets
:ivar str model_version: The model version to use for the analysis.
:ivar str string_index_type: Specifies the method used to interpret string offsets.
`UnicodeCodePoint`, the Python encoding, is the default. To override the Python default,
you can also pass in `Utf16CodePoint` or TextElements_v8`. For additional information
see https://aka.ms/text-analytics-offsets
"""

def __init__(self, **kwargs):
self.model_version = kwargs.get("model_version", "latest")
self.string_index_type = kwargs.get("string_index_type", "UnicodeCodePoint")

def __repr__(self, **kwargs):
return "RecognizeLinkedEntitiesAction(model_version={}, string_index_type={})" \
.format(self.model_version, self.string_index_type)[:1024]

def to_generated(self):
return _latest_preview_models.EntityLinkingTask(
parameters=_latest_preview_models.EntityLinkingTaskParameters(
model_version=self.model_version,
string_index_type=self.string_index_type
)
)


class RequestStatistics(DictMixin):
def __init__(self, **kwargs):
self.documents_count = kwargs.get("documents_count")
Expand All @@ -1544,8 +1587,8 @@ def _from_generated(cls, request_statistics):
)

def __repr__(self, **kwargs):
return "RequestStatistics(documents_count={}, valid_documents_count={}, erroneous_documents_count={}, \
transactions_count={}".format(
return "RequestStatistics(documents_count={}, valid_documents_count={}, erroneous_documents_count={}, " \
"transactions_count={})".format(
self.documents_count,
self.valid_documents_count,
self.erroneous_documents_count,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ def __init__(self, *args, **kwargs):

class AnalyzeResult(ItemPaged):
def __init__(self, *args, **kwargs):
self.statistics = kwargs.pop('statistics')
self.statistics = kwargs.pop('statistics', None)
super(AnalyzeResult, self).__init__(*args, **kwargs)
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
TextDocumentInput,
RecognizeEntitiesAction,
RecognizePiiEntitiesAction,
RecognizeLinkedEntitiesAction,
AnalyzeBatchActionsType,
)

Expand Down Expand Up @@ -72,6 +73,8 @@ def _determine_action_type(action):
return AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
if isinstance(action, RecognizePiiEntitiesAction):
return AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES
if isinstance(action, RecognizeLinkedEntitiesAction):
return AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
return AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES

def _check_string_index_type_arg(string_index_type_arg, api_version, string_index_type_default="UnicodeCodePoint"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
RequestStatistics,
AnalyzeBatchActionsType,
AnalyzeBatchActionsError,
TextDocumentBatchStatistics,
_get_indices,
)
from ._paging import AnalyzeHealthcareEntitiesResult, AnalyzeResult
Expand Down Expand Up @@ -204,27 +203,34 @@ def _get_deserialization_callback_from_task_type(task_type):
return entities_result
if task_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES:
return pii_entities_result
if task_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
return linked_entities_result
return key_phrases_result

def _get_property_name_from_task_type(task_type):
if task_type == AnalyzeBatchActionsType.RECOGNIZE_ENTITIES:
return "entity_recognition_tasks"
if task_type == AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES:
return "entity_recognition_pii_tasks"
if task_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
return "entity_linking_tasks"
return "key_phrase_extraction_tasks"

def _num_tasks_in_current_page(returned_tasks_object):
return (
len(returned_tasks_object.entity_recognition_tasks or []) +
len(returned_tasks_object.entity_recognition_pii_tasks or []) +
len(returned_tasks_object.key_phrase_extraction_tasks or [])
len(returned_tasks_object.key_phrase_extraction_tasks or []) +
len(returned_tasks_object.entity_linking_tasks or [])
)

def _get_task_type_from_error(error):
if "pii" in error.target.lower():
return AnalyzeBatchActionsType.RECOGNIZE_PII_ENTITIES
if "entity" in error.target.lower():
if "entityrecognition" in error.target.lower():
return AnalyzeBatchActionsType.RECOGNIZE_ENTITIES
if "entitylinking" in error.target.lower():
return AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
return AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES

def _get_mapped_errors(analyze_job_state):
Expand All @@ -249,6 +255,9 @@ def _get_good_result(current_task_type, index_of_task_result, doc_id_order, resp
)
return AnalyzeBatchActionsResult(
document_results=document_results,
statistics=RequestStatistics._from_generated( # pylint: disable=protected-access
response_task_to_deserialize.results.statistics
) if response_task_to_deserialize.results.statistics else None,
action_type=current_task_type,
completed_on=response_task_to_deserialize.last_update_date_time,
)
Expand Down Expand Up @@ -312,9 +321,7 @@ def healthcare_paged_result(doc_id_order, health_status_callback, _, obj, respon
def analyze_paged_result(doc_id_order, task_order, analyze_status_callback, _, obj, response_headers, show_stats=False): # pylint: disable=unused-argument
return AnalyzeResult(
functools.partial(lro_get_next_page, analyze_status_callback, obj, show_stats=show_stats),
functools.partial(analyze_extract_page_data, doc_id_order, task_order, response_headers),
statistics=TextDocumentBatchStatistics._from_generated(obj.statistics) \
if (show_stats and obj.statistics) else None # pylint: disable=protected-access
functools.partial(analyze_extract_page_data, doc_id_order, task_order, response_headers)
)

def _get_deserialize():
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from urllib.parse import urlparse, parse_qsl

from azure.core.async_paging import AsyncList
from ._models import RequestStatistics, TextDocumentBatchStatistics
from ._models import RequestStatistics
from ._async_paging import (
AnalyzeHealthcareEntitiesResultAsync,
AnalyzeResultAsync
Expand Down Expand Up @@ -58,6 +58,4 @@ def analyze_paged_result(
return AnalyzeResultAsync(
functools.partial(lro_get_next_page_async, analyze_status_callback, obj),
functools.partial(analyze_extract_page_data_async, doc_id_order, task_order, response_headers),
statistics=TextDocumentBatchStatistics._from_generated(obj.statistics) \
if show_stats and obj.statistics is not None else None # pylint: disable=protected-access
)
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
RecognizePiiEntitiesResult,
RecognizeEntitiesAction,
RecognizePiiEntitiesAction,
RecognizeLinkedEntitiesAction,
ExtractKeyPhrasesAction,
AnalyzeHealthcareEntitiesResultItem,
AnalyzeBatchActionsResult,
Expand Down Expand Up @@ -743,7 +744,7 @@ def _analyze_result_callback(self, doc_id_order, task_order, raw_response, _, he
def begin_analyze_batch_actions( # type: ignore
self,
documents, # type: Union[List[str], List[TextDocumentInput], List[Dict[str, str]]]
actions, # type: List[Union[RecognizeEntitiesAction, RecognizePiiEntitiesAction, ExtractKeyPhrasesAction]]
actions, # type: List[Union[RecognizeEntitiesAction, RecognizeLinkedEntitiesAction, RecognizePiiEntitiesAction, ExtractKeyPhrasesAction]] # pylint: disable=line-too-long
**kwargs # type: Any
): # type: (...) -> LROPoller[ItemPaged[AnalyzeBatchActionsResult]]
"""Start a long-running operation to perform a variety of text analysis actions over a batch of documents.
Expand All @@ -761,7 +762,8 @@ def begin_analyze_batch_actions( # type: ignore
The outputted action results will be in the same order you inputted your actions.
Duplicate actions in list not supported.
:type actions:
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction]
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction or
RecognizeLinkedEntitiesAction]
:keyword str display_name: An optional display name to set for the requested analysis.
:keyword str language: The 2 letter ISO 639-1 representation of language for the
entire batch. For example, use "en" for English; "es" for Spanish etc.
Expand Down Expand Up @@ -816,6 +818,13 @@ def begin_analyze_batch_actions( # type: ignore
key_phrase_extraction_tasks=[
t.to_generated() for t in
[a for a in actions if _determine_action_type(a) == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES]
],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you update the typehint on line 734?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated. I also had to disable line-too-long for pylint on this one.

entity_linking_tasks=[
t.to_generated() for t in
[
a for a in actions
if _determine_action_type(a) == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
]
]
)
analyze_body = self._client.models(api_version='v3.1-preview.4').AnalyzeBatchInput(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,8 @@ async def begin_analyze_batch_actions( # type: ignore
The outputted action results will be in the same order you inputted your actions.
Duplicate actions in list not supported.
:type actions:
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction]
list[RecognizeEntitiesAction or RecognizePiiEntitiesAction or ExtractKeyPhrasesAction or
RecognizeLinkedEntitiesAction]
:keyword str display_name: An optional display name to set for the requested analysis.
:keyword str language: The 2 letter ISO 639-1 representation of language for the
entire batch. For example, use "en" for English; "es" for Spanish etc.
Expand Down Expand Up @@ -797,6 +798,13 @@ async def begin_analyze_batch_actions( # type: ignore
key_phrase_extraction_tasks=[
t.to_generated() for t in
[a for a in actions if _determine_action_type(a) == AnalyzeBatchActionsType.EXTRACT_KEY_PHRASES]
],
entity_linking_tasks=[
t.to_generated() for t in
[
a for a in actions if \
_determine_action_type(a) == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES
]
]
)
analyze_body = self._client.models(api_version='v3.1-preview.4').AnalyzeBatchInput(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ async def analyze_async(self):
from azure.ai.textanalytics.aio import TextAnalyticsClient
from azure.ai.textanalytics import (
RecognizeEntitiesAction,
RecognizeLinkedEntitiesAction,
RecognizePiiEntitiesAction,
ExtractKeyPhrasesAction,
AnalyzeBatchActionsType
Expand Down Expand Up @@ -63,7 +64,8 @@ async def analyze_async(self):
actions=[
RecognizeEntitiesAction(),
RecognizePiiEntitiesAction(),
ExtractKeyPhrasesAction()
ExtractKeyPhrasesAction(),
RecognizeLinkedEntitiesAction()
]
)

Expand Down Expand Up @@ -104,6 +106,24 @@ async def analyze_async(self):
print("Key Phrases: {}\n".format(doc.key_phrases))
print("------------------------------------------")

if action_result.action_type == AnalyzeBatchActionsType.RECOGNIZE_LINKED_ENTITIES:
print("Results of Linked Entities Recognition action:")
for idx, doc in enumerate(action_result.document_results):
print("Document text: {}\n".format(documents[idx]))
for linked_entity in doc.entities:
print("Entity name: {}".format(linked_entity.name))
print("...Data source: {}".format(linked_entity.data_source))
print("...Data source language: {}".format(linked_entity.language))
print("...Data source entity ID: {}".format(linked_entity.data_source_entity_id))
print("...Data source URL: {}".format(linked_entity.url))
print("...Document matches:")
for match in linked_entity.matches:
print("......Match text: {}".format(match.text))
print(".........Confidence Score: {}".format(match.confidence_score))
print(".........Offset: {}".format(match.offset))
print(".........Length: {}".format(match.length))
print("------------------------------------------")

# [END analyze_async]


Expand Down
Loading