Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TextAnalytics] Implemented ExtractiveSummarization input #22791

Merged
merged 8 commits into from
Jul 22, 2021
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,13 @@ internal ExtractKeyPhrasesResultCollection() : base (default(System.Collections.
public string ModelVersion { get { throw null; } }
public Azure.AI.TextAnalytics.TextDocumentBatchStatistics Statistics { get { throw null; } }
}
public partial class ExtractSummaryAction
{
public ExtractSummaryAction() { }
public int? MaxSentenceCount { get { throw null; } set { } }
public string ModelVersion { get { throw null; } set { } }
public Azure.AI.TextAnalytics.SummarySentencesOrder? OrderBy { get { throw null; } set { } }
}
public partial class HealthcareEntity
{
internal HealthcareEntity() { }
Expand Down Expand Up @@ -711,6 +718,11 @@ internal SentimentConfidenceScores() { }
public double Neutral { get { throw null; } }
public double Positive { get { throw null; } }
}
public enum SummarySentencesOrder
{
Offset = 0,
Rank = 1,
}
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
public readonly partial struct TargetSentiment
{
Expand All @@ -735,6 +747,7 @@ public TextAnalyticsActions() { }
public System.Collections.Generic.IReadOnlyCollection<Azure.AI.TextAnalytics.AnalyzeSentimentAction> AnalyzeSentimentActions { get { throw null; } set { } }
public string DisplayName { get { throw null; } set { } }
public System.Collections.Generic.IReadOnlyCollection<Azure.AI.TextAnalytics.ExtractKeyPhrasesAction> ExtractKeyPhrasesActions { get { throw null; } set { } }
public System.Collections.Generic.IReadOnlyCollection<Azure.AI.TextAnalytics.ExtractSummaryAction> ExtractSummaryActions { get { throw null; } set { } }
public System.Collections.Generic.IReadOnlyCollection<Azure.AI.TextAnalytics.RecognizeEntitiesAction> RecognizeEntitiesActions { get { throw null; } set { } }
public System.Collections.Generic.IReadOnlyCollection<Azure.AI.TextAnalytics.RecognizeLinkedEntitiesAction> RecognizeLinkedEntitiesActions { get { throw null; } set { } }
public System.Collections.Generic.IReadOnlyCollection<Azure.AI.TextAnalytics.RecognizePiiEntitiesAction> RecognizePiiEntitiesActions { get { throw null; } set { } }
Expand Down Expand Up @@ -811,13 +824,14 @@ public TextAnalyticsClient(System.Uri endpoint, Azure.Core.TokenCredential crede
}
public partial class TextAnalyticsClientOptions : Azure.Core.ClientOptions
{
public TextAnalyticsClientOptions(Azure.AI.TextAnalytics.TextAnalyticsClientOptions.ServiceVersion version = Azure.AI.TextAnalytics.TextAnalyticsClientOptions.ServiceVersion.V3_1) { }
public TextAnalyticsClientOptions(Azure.AI.TextAnalytics.TextAnalyticsClientOptions.ServiceVersion version = Azure.AI.TextAnalytics.TextAnalyticsClientOptions.ServiceVersion.V3_2_Preview_1) { }
public string DefaultCountryHint { get { throw null; } set { } }
public string DefaultLanguage { get { throw null; } set { } }
public enum ServiceVersion
{
V3_0 = 1,
V3_1 = 2,
V3_2_Preview_1 = 3,
}
}
[System.Runtime.InteropServices.StructLayoutAttribute(System.Runtime.InteropServices.LayoutKind.Sequential)]
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// Configurations that allow callers to specify details about how to execute
/// an Extract Summary action in a set of documents.
kinelski marked this conversation as resolved.
Show resolved Hide resolved
/// For example, set the model version, specify the order in which extracted
/// sentences are returned, and more.
/// </summary>
public class ExtractSummaryAction
{
/// <summary>
/// Initializes a new instance of the <see cref="ExtractSummaryAction"/>
/// class which allows callers to specify details about how to execute
/// an Extract Summary action in a set of documents.
/// For example, set the model version, specify the order in which extracted
/// sentences are returned, and more.
/// </summary>
public ExtractSummaryAction()
{
}
kinelski marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// Gets or sets a value that, if set, indicates the version of the text
/// analytics model that will be used to generate the result. For supported
/// model versions, see operation-specific documentation, for example:
/// <see href="https://docs.microsoft.com/azure/cognitive-services/text-analytics/concepts/model-versioning#available-versions"/>.
/// </summary>
public string ModelVersion { get; set; }
kinelski marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// If set, specifies the maximum limit of sentences returned in the result. Defaults to 3.
/// </summary>
public int? MaxSentenceCount { get; set; }
kinelski marked this conversation as resolved.
Show resolved Hide resolved

/// <summary>
/// If set, specifies the order in which the extracted sentences will be returned in the result. Use
/// <see cref="SummarySentencesOrder.Offset"/> to keep the original order in which the sentences appear
/// in the input. Use <see cref="SummarySentencesOrder.Rank"/> to order them according to their relevance
/// to the text input, as decided by the service. Defaults to <see cref="SummarySentencesOrder.Offset"/>.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// to the text input, as decided by the service. Defaults to <see cref="SummarySentencesOrder.Offset"/>.
/// to the document input, as decided by the service. Defaults to <see cref="SummarySentencesOrder.Offset"/>.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think we should do the default in a remarks?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We don't do it in the remarks in other properties. For example, in other actions:

        /// <summary>
        /// The default value of this property is 'false'. This means, Text Analytics service logs your input text for 48 hours,
        /// solely to allow for troubleshooting issues.
        /// Setting this property to true, disables input logging and may limit our ability to investigate issues that occur.
        /// <para>
        /// Please see Cognitive Services Compliance and Privacy notes at <see href="https://aka.ms/cs-compliance"/> for additional details,
        /// and Microsoft Responsible AI principles at <see href="https://www.microsoft.com/ai/responsible-ai"/>.
        /// </para>
        /// </summary>
        /// <remarks>
        /// This property only applies for <see cref="TextAnalyticsClientOptions.ServiceVersion.V3_1"/> and up.
        /// </remarks>
        public bool? DisableServiceLogs { get; set; }

So I put it in the summary section for consistency.

/// </summary>
public SummarySentencesOrder? OrderBy { get; set; }
}
}

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

This file was deleted.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.

using Azure.Core;

namespace Azure.AI.TextAnalytics
{
/// <summary>
/// The order in which extracted sentences will be returned on summary extraction.
kinelski marked this conversation as resolved.
Show resolved Hide resolved
/// </summary>
[CodeGenModel("ExtractiveSummarizationTaskParametersSortBy")]
public enum SummarySentencesOrder
{
/// <summary>
/// Keeps the original order in which the sentences appear in the input.
/// </summary>
Offset,

/// <summary>
/// Orders sentences according to their relevance to the text input, as decided by the service.
kinelski marked this conversation as resolved.
Show resolved Hide resolved
/// </summary>
Rank
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,5 +51,14 @@ public TextAnalyticsActions()
/// Note that currently only one <see cref="AnalyzeSentimentAction"/> is supported.
/// </summary>
public IReadOnlyCollection<AnalyzeSentimentAction> AnalyzeSentimentActions { get; set; }

/// <summary>
/// The set of <see cref="ExtractSummaryAction"/> that will get executed on the input documents.
/// Note that currently only one <see cref="ExtractSummaryAction"/> is supported.
/// </summary>
/// <remarks>
/// This property only applies for <see cref="TextAnalyticsClientOptions.ServiceVersion.V3_2_Preview_1"/> and up.
/// </remarks>
public IReadOnlyCollection<ExtractSummaryAction> ExtractSummaryActions { get; set; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2470,6 +2470,10 @@ private static JobManifestTasks CreateTasks(TextAnalyticsActions actions)
{
tasks.SentimentAnalysisTasks = Transforms.ConvertFromAnalyzeSentimentActionsToTasks(actions.AnalyzeSentimentActions);
}
if (actions.ExtractSummaryActions != null)
{
tasks.ExtractiveSummarizationTasks = Transforms.ConvertFromExtractSummaryActionsToTasks(actions.ExtractSummaryActions);
}
return tasks;
}

Expand All @@ -2479,7 +2483,8 @@ private static void ValidateActions(TextAnalyticsActions actions)
actions.RecognizeEntitiesActions?.Count > 1 ||
actions.RecognizeLinkedEntitiesActions?.Count > 1 ||
actions.ExtractKeyPhrasesActions?.Count > 1 ||
actions.AnalyzeSentimentActions?.Count > 1)
actions.AnalyzeSentimentActions?.Count > 1 ||
actions.ExtractSummaryActions?.Count > 1)
{
throw new ArgumentException("Multiple of the same action is not currently supported.");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class TextAnalyticsClientOptions : ClientOptions
/// <summary>
/// The latest service version supported by this client library.
/// </summary>
internal const ServiceVersion LatestVersion = ServiceVersion.V3_1;
internal const ServiceVersion LatestVersion = ServiceVersion.V3_2_Preview_1;

/// <summary>
/// The versions of the Text Analytics service supported by this client library.
Expand All @@ -33,6 +33,11 @@ public enum ServiceVersion
/// Version 3.1
/// </summary>
V3_1 = 2,

/// <summary>
/// Version 3.2-preview.1
/// </summary>
V3_2_Preview_1 = 3
#pragma warning restore CA1707 // Identifiers should not contain underscores
}

Expand Down Expand Up @@ -75,6 +80,7 @@ internal static string GetVersionString(ServiceVersion version)
{
ServiceVersion.V3_0 => "v3.0",
ServiceVersion.V3_1 => "v3.1",
ServiceVersion.V3_2_Preview_1 => "v3.1",
kinelski marked this conversation as resolved.
Show resolved Hide resolved

_ => throw new ArgumentException($"Version {version} not supported."),
};
Expand Down
26 changes: 26 additions & 0 deletions sdk/textanalytics/Azure.AI.TextAnalytics/src/Transforms.cs
Original file line number Diff line number Diff line change
Expand Up @@ -393,6 +393,20 @@ internal static SentimentAnalysisTask ConvertToSentimentAnalysisTask(AnalyzeSent
};
}

internal static ExtractiveSummarizationTask ConvertToExtractiveSummarizationTask(ExtractSummaryAction action)
{
return new ExtractiveSummarizationTask()
{
Parameters = new ExtractiveSummarizationTaskParameters()
{
ModelVersion = action.ModelVersion,
StringIndexType = Constants.DefaultStringIndexType,
SentenceCount = action.MaxSentenceCount,
SortBy = action.OrderBy
}
};
}

internal static IList<EntityLinkingTask> ConvertFromRecognizeLinkedEntitiesActionsToTasks(IReadOnlyCollection<RecognizeLinkedEntitiesAction> recognizeLinkedEntitiesActions)
{
List<EntityLinkingTask> list = new List<EntityLinkingTask>();
Expand Down Expand Up @@ -453,6 +467,18 @@ internal static IList<SentimentAnalysisTask> ConvertFromAnalyzeSentimentActionsT
return list;
}

internal static IList<ExtractiveSummarizationTask> ConvertFromExtractSummaryActionsToTasks(IReadOnlyCollection<ExtractSummaryAction> extractSummaryActions)
{
List<ExtractiveSummarizationTask> list = new List<ExtractiveSummarizationTask>();

foreach (ExtractSummaryAction action in extractSummaryActions)
{
list.Add(ConvertToExtractiveSummarizationTask(action));
}

return list;
}

private static string[] parseActionErrorTarget(string targetReference)
{
if (string.IsNullOrEmpty(targetReference))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -707,6 +707,40 @@ public void AnalyzeOperationAnalyzeSentimentWithTwoActions()

#endregion Analyze sentiment

#region Extract summary

[Test]
public void AnalyzeOperationExtractsSummaryWithTwoActions()
kinelski marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would be a live test, so I can only add it when the output side is implemented. Do you have tests for long documents in all endpoints in JS or this is something added only for extractive text summarization?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can only add it when the output side is implemented.

To clarify, you're saying processing the response received from the service is not yet implement in the SDK? that is fine if so.

The nature of the input document depends on the action you want to apply to it so in this case I think a long document is needed to make sure we get a useful summary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To clarify, you're saying processing the response received from the service is not yet implement in the SDK?

Correct. We just have the "sending" part implemented in this PR.

{
var mockResponse = new MockResponse(202);
mockResponse.AddHeader(new HttpHeader("Operation-Location", "something/jobs/2a96a91f-7edf-4931-a880-3fdee1d56f15"));

var mockTransport = new MockTransport(new[] { mockResponse, mockResponse });
var client = CreateTestClient(mockTransport);

var documents = new List<string>
{
"Elon Musk is the CEO of SpaceX and Tesla."
};

TextAnalyticsActions batchActions = new()
{
ExtractSummaryActions = new List<ExtractSummaryAction>()
{
new ExtractSummaryAction(),
new ExtractSummaryAction()
{
ModelVersion = "InvalidVersion"
}
},
};

ArgumentException ex = Assert.ThrowsAsync<ArgumentException>(async () => await client.StartAnalyzeActionsAsync(documents, batchActions));
Assert.AreEqual("Multiple of the same action is not currently supported.", ex.Message);
}

#endregion Extract summary

[Test]
public async Task AnalyzeOperationWithActionsError()
{
Expand Down