Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[text analytics] Updates to Healthcare design #16247

Merged
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
c00140c
Renamed healthcare methods
Jan 14, 2021
e69151c
Exposing job metadata on the healthcare polling operation
Jan 14, 2021
d005b70
Aligning with other similar work for Analyze
Feb 2, 2021
ac9faf0
Fixed merge conflicts
Feb 2, 2021
078e875
Regenerated with custom pollers; updated tests and a few other files
Feb 2, 2021
c2a09ff
Renamed HealthcareEntityLink -> HealthcareEntityDataSource and also r…
Feb 3, 2021
fb37651
Fixed missed renames in __init__.py
Feb 3, 2021
f827c6b
Updated links and other things in the readme
Feb 3, 2021
864b1b2
Updated samples readme
Feb 3, 2021
2649001
Merge branch 'master' of https://github.com/Azure/azure-sdk-for-pytho…
Feb 3, 2021
eed2d38
Reverted an unintentional change in analyze_sentiment
Feb 3, 2021
429c4ea
Moved cancellation to the poller; fixed some linting issues
Feb 4, 2021
ef6c210
Removed an unused model
Feb 4, 2021
91ba6a1
PR comments and updated changelog
Feb 4, 2021
a78decd
Added related_entities; Fixed merge conflicts
Feb 5, 2021
7da7797
Fixed a few issues caused by merge conflicts; made HealthcareEntity h…
Feb 5, 2021
0a14704
Fixed a problem with cancellation; updated samples
Feb 5, 2021
d952ded
Merge conflicts
Feb 5, 2021
82bc4e4
PR comments
Feb 5, 2021
03d68a7
PR comments
Feb 5, 2021
00c00a5
Fixed bidirectional check; updated/added tests
Feb 5, 2021
f2dedef
added a test for async
Feb 5, 2021
bae8200
PR comments and fixing test issues
Feb 5, 2021
4563059
Fixed a linting issue
Feb 5, 2021
f9e5776
PR comments and fixed a test issue
Feb 5, 2021
3745666
Fixed cancellation issue
Feb 5, 2021
0971671
Another cancellation test fix
Feb 5, 2021
7771a70
PR comments
Feb 5, 2021
acc58ed
Async syntax issue
Feb 5, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from azure.core.async_paging import AsyncItemPaged


class AnalyzeHealthcareResultAsync(AsyncItemPaged):
class AnalyzeHealthcareEntitiesResultAsync(AsyncItemPaged):
def __init__(self, *args, **kwargs):
self.model_version = kwargs.pop('model_version')
self.statistics = kwargs.pop('statistics')
super(AnalyzeHealthcareResultAsync, self).__init__(*args, **kwargs)
super(AnalyzeHealthcareResultEntitiesAsync, self).__init__(*args, **kwargs)


class AnalyzeResultAsync(AsyncItemPaged):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
# ------------------------------------
import re
from enum import Enum
from six.moves.urllib.parse import urlparse
from ._generated.models import (
LanguageInput,
MultiLanguageInput,
Expand Down Expand Up @@ -182,9 +183,37 @@ def __repr__(self):
)[:1024]


class AnalyzeHealthcareResultItem(DictMixin):
class AnalyzeHealthcareEntitiesOperation(DictMixin):
"""TODO
"""
AnalyzeHealthcareResultItem contains the Healthcare entities and relations from a
def __init__(self, **kwargs):
self._poller = kwargs.get("poller", None)
self._initial_response = getattr(self._poller._polling_method, "_initial_response") # pylint: disable=protected-access
iscai-msft marked this conversation as resolved.
Show resolved Hide resolved
self.id = self._poller.resource().job_id
self.created_date_time = None
self.expiration_date_time = None
self.last_update_date_time = None
self.status = None
self.continuation_token = self._poller.get_continuation_token()
iscai-msft marked this conversation as resolved.
Show resolved Hide resolved
self.update_status()

def get_result(self):
result = self._poller.result()
self.update_status()
return result

def update_status(self):
self._poller.update_status()
job_metadata = self._poller.resource()
self.created_date_time = job_metadata.created_date_time
self.expiration_date_time = job_metadata.expiration_date_time
self.last_update_date_time = job_metadata.last_update_date_time
self.status = job_metadata.status


class AnalyzeHealthcareEntitiesResultItem(DictMixin):
"""
AnalyzeHealthcareEntitiesResultItem contains the Healthcare entities and relations from a
particular document.

:ivar str id: Unique, non-empty document identifier that matches the
Expand All @@ -204,7 +233,7 @@ class AnalyzeHealthcareResultItem(DictMixin):
:vartype statistics:
~azure.ai.textanalytics.TextDocumentStatistics
:ivar bool is_error: Boolean check for error item when iterating over list of
results. Always False for an instance of a AnalyzeHealthcareResult.
results. Always False for an instance of a AnalyzeHealthcareEntitiesResultItem.
"""

def __init__(self, **kwargs):
Expand Down Expand Up @@ -234,7 +263,7 @@ def _from_generated(cls, healthcare_result):
)

def __repr__(self):
return "AnalyzeHealthcareResultItem(id={}, entities={}, relations={}, warnings={}, statistics={}, \
return "AnalyzeHealthcareEntitiesResultItem(id={}, entities={}, relations={}, warnings={}, statistics={}, \
is_error={})".format(
self.id,
self.entities,
Expand Down Expand Up @@ -1286,6 +1315,41 @@ def __repr__(self, **kwargs):
.format(self.name, repr(self.results))[:1024]


class AnalyzeBatchTasksOperation(DictMixin):
"""TODO
"""
def __init__(self, **kwargs):
self._poller = kwargs.get("poller", None)
self._initial_response = getattr(self._poller._polling_method, "_initial_response") # pylint: disable=protected-access
self.id = self._poller.resource().job_id
self.created_date_time = None
self.expiration_date_time = None
self.last_update_date_time = None
self.status = None
self.tasks_completed = None
self.tasks_failed = None
self.tasks_in_progress = None
self.total_tasks = None
self.continuation_token = self._poller.get_continuation_token()
self.update_status()

def get_result(self):
result = self._poller.result()
self.update_status()
return result

def update_status(self):
self._poller.update_status()
job_metadata = self._poller.resource()
self.created_date_time = job_metadata.created_date_time
self.expiration_date_time = job_metadata.expiration_date_time
self.last_update_date_time = job_metadata.last_update_date_time
self.status = job_metadata.status
self.tasks_completed = job_metadata.tasks.completed
self.tasks_failed = job_metadata.tasks.failed
self.tasks_in_progress = job_metadata.tasks.in_progress
self.total_tasks = job_metadata.tasks.total

class TextAnalysisResult(DictMixin):
"""TextAnalysisResult contains the results of multiple text analyses performed on a batch of documents.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
from azure.core.paging import ItemPaged


class AnalyzeHealthcareResult(ItemPaged):
class AnalyzeHealthcareEntitiesResult(ItemPaged):
def __init__(self, *args, **kwargs):
self.model_version = kwargs.pop('model_version')
self.statistics = kwargs.pop('statistics')
super(AnalyzeHealthcareResult, self).__init__(*args, **kwargs)
super(AnalyzeHealthcareEntitiesResult, self).__init__(*args, **kwargs)


class AnalyzeResult(ItemPaged):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
_get_deserialize
)
from ._lro import TextAnalyticsOperationResourcePolling, TextAnalyticsLROPollingMethod
from ._models import AnalyzeHealthcareEntitiesOperation

if TYPE_CHECKING:
from azure.core.credentials import TokenCredential, AzureKeyCredential
Expand All @@ -49,7 +50,6 @@
EntitiesRecognitionTask,
PiiEntitiesRecognitionTask,
KeyPhraseExtractionTask,
AnalyzeHealthcareResultItem,
TextAnalysisResult
)

Expand Down Expand Up @@ -406,11 +406,11 @@ def _healthcare_result_callback(self, doc_id_order, raw_response, _, headers, sh
)

@distributed_trace
def begin_analyze_healthcare( # type: ignore
def begin_analyze_healthcare_entities( # type: ignore
self,
documents, # type: Union[List[str], List[TextDocumentInput], List[Dict[str, str]]]
**kwargs # type: Any
): # type: (...) -> LROPoller[ItemPaged[AnalyzeHealthcareResultItem]]
): # type: (...) -> AnalyzeHealthcareEntitiesOperation[ItemPaged[AnalyzeHealthcareResultItem]]
"""Analyze healthcare entities and identify relationships between these entities in a batch of documents.

Entities are associated with references that can be found in existing knowledge bases,
Expand All @@ -433,8 +433,8 @@ def begin_analyze_healthcare( # type: ignore
:keyword int polling_interval: Waiting time between two polls for LRO operations
if no Retry-After header is present. Defaults to 5 seconds.
:keyword str continuation_token: A continuation token to restart a poller from a saved state.
:return: An instance of an LROPoller. Call `result()` on the poller
object to return a list[:class:`~azure.ai.textanalytics.AnalyzeHealthcareResultItem`].
:return: An instance of an AnalyzeHealthcareEntitiesOperation. Call `get_result()` on the this
object to return a list[:class:`~azure.ai.textanalytics.AnalyzeHealthcareEntitiesResultItem`].
:raises ~azure.core.exceptions.HttpResponseError or TypeError or ValueError or NotImplementedError:

.. admonition:: Example:
Expand All @@ -457,7 +457,7 @@ def begin_analyze_healthcare( # type: ignore
doc_id_order = [doc.get("id") for doc in docs]

try:
return self._client.begin_health(
poller = self._client.begin_health(
docs,
model_version=model_version,
string_index_type=self._string_code_unit,
Expand All @@ -471,28 +471,31 @@ def begin_analyze_healthcare( # type: ignore
continuation_token=continuation_token,
**kwargs
)
return AnalyzeHealthcareEntitiesOperation(poller=poller)

except ValueError as error:
if "API version v3.0 does not have operation 'begin_health'" in str(error):
raise ValueError(
"'begin_analyze_healthcare' endpoint is only available for API version v3.1-preview.3"
"'begin_analyze_healthcare_entities' method is only available for API version \
v3.1-preview.3 and up."
)
raise error

except HttpResponseError as error:
process_http_response_error(error)

def begin_cancel_analyze_healthcare( # type: ignore
def begin_cancel_analyze_healthcare_entities_operation( # type: ignore
self,
poller, # type: LROPoller[ItemPaged[AnalyzeHealthcareResultItem]]
healthcare_operation, # type: AnalyzeHealthcareEntitiesOperation[ItemPaged[AnalyzeHealthcareResultItem]]
**kwargs
):
# type: (...) -> LROPoller[None]
# type: (...) -> Union[None, LROPoller[None]]
"""Cancel an existing health operation.

:param poller: The LRO poller object associated with the health operation.
:return: An instance of an LROPoller that returns None.
:rtype: ~azure.core.polling.LROPoller[None]
:param healthcare_operation: The operation to cancel.
:return: If the operation is already in a terminal state returns None, otherwise returns an instance
of an LROPoller that returns None.
:rtype: Union[None, ~azure.core.polling.LROPoller[None]]
:raises ~azure.core.exceptions.HttpResponseError or TypeError or ValueError or NotImplementedError:

.. admonition:: Example:
Expand All @@ -505,20 +508,33 @@ def begin_cancel_analyze_healthcare( # type: ignore
:caption: Cancel an existing health operation.
"""
polling_interval = kwargs.pop("polling_interval", 5)
initial_response = getattr(poller._polling_method, "_initial_response") # pylint: disable=protected-access
operation_location = initial_response.http_response.headers["Operation-Location"]

job_id = urlparse(operation_location).path.split("/")[-1]
terminal_states = ["cancelled", "cancelling", "failed", "succeeded", "partiallyCompleted", "rejected"]
healthcare_operation.update_status()

if healthcare_operation.status in terminal_states:
print("Operation with ID '%s' is already in a terminal state and cannot be cancelled." \
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should this be raised as an error? or does Python knows how to mask this print statement into something the user can see

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe an error would be better. It's a little bit difficult to test every code path here since most of the time the jobs succeed quickly enough that this always happens. I could try adding larger documents to the request to see if i can get the job to run long enough to test actual cancellation.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I decided to raise the exception type Warning in the new cancellation method on the poller. I thought this type made the most sense of all the built-in exceptions, but I can change it to something else if anyone has any suggestions.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm idk what the behavior is in Python for this scenarios. @iscai-msft for context

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for python we can log warnings, usually they look like this, not Warning raises. I think if other languages are raising an error when trying to cancel a finished operation, we might as well just raise an error too though

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I got the inspiration for this from here. If there is a better exception type to use, could you make a suggestion?

% healthcare_operation.id)
return

try:
return self._client.begin_cancel_health_job(
job_id,
healthcare_operation.id,
polling=TextAnalyticsLROPollingMethod(timeout=polling_interval)
)

except ValueError as error:
if "API version v3.0 does not have operation 'begin_cancel_health_job'" in str(error):
raise ValueError(
"'begin_cancel_analyze_healthcare_entities' method is only available for API version \
v3.1-preview.3 and up."
)
raise error

except HttpResponseError as error:
process_http_response_error(error)


@distributed_trace
def extract_key_phrases( # type: ignore
self,
Expand Down Expand Up @@ -616,6 +632,9 @@ def analyze_sentiment( # type: ignore
:class:`~azure.ai.textanalytics.SentenceSentiment` objects
will have property `mined_opinions` containing the result of this analysis. Only available for
API version v3.1-preview and up.
:keyword str string_index_type: Specifies the method used to interpret string offsets. Possible values are
mssfang marked this conversation as resolved.
Show resolved Hide resolved
'UnicodeCodePoint', 'TextElements_v8', or 'Utf16CodeUnit'. The default value is 'UnicodeCodePoint'.
Only available for API version v3.1-preview and up.
:keyword str language: The 2 letter ISO 639-1 representation of language for the
entire batch. For example, use "en" for English; "es" for Spanish etc.
If not set, uses "en" for English as default. Per-document language will
Expand Down Expand Up @@ -649,9 +668,11 @@ def analyze_sentiment( # type: ignore
docs = _validate_input(documents, "language", language)
model_version = kwargs.pop("model_version", None)
show_stats = kwargs.pop("show_stats", False)
show_opinion_mining = kwargs.pop("show_opinion_mining", None)
if self._string_code_unit:
kwargs.update({"string_index_type": self._string_code_unit})
show_opinion_mining = kwargs.pop("show_opinion_mining", True)
string_index_type = kwargs.pop("string_index_type", self._string_code_unit)

if string_index_type is not None:
kwargs.update({"string_index_type": string_index_type})

if show_opinion_mining is not None:
kwargs.update({"opinion_mining": show_opinion_mining})
Expand Down