Skip to content

Commit

Permalink
[Event Hubs Client] Idempotent Error Handling (#25586)
Browse files Browse the repository at this point in the history
The focus of these changes is to ensure that the AMQP connection and link
states are fully reset on any exception after retries are exhausted in order
to ensure no data loss.

In error scenarios with ambiguous outcomes, it is possible for the client and
service state for idempotent sequencing to become out-of-sync with no way to
detect or correct the problem.  When this happens, it is possible that events
being published are incorrectly identified by the service as duplicates and
their receipt acknowledged.  This leads the client to signal to callers that
publishing was successful, though the events were ignored by the service and
will never be available to be read.

By resetting the connection and forcing the client to use a new publisher
identifier, the service will reset state and perform the initial handshake
with the client, ensuring that state is properly synchronized between them.
  • Loading branch information
jsquire authored Dec 1, 2021
1 parent fd9ad86 commit 345c70e
Show file tree
Hide file tree
Showing 15 changed files with 550 additions and 67 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.29215.179
# Visual Studio Version 17
VisualStudioVersion = 17.0.31912.275
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Messaging.EventHubs.Processor", "src\Azure.Messaging.EventHubs.Processor.csproj", "{B9C4A45A-BD10-4C4C-B7BF-EE4F601AE7FC}"
EndProject
Expand All @@ -13,7 +13,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "External", "External", "{79
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Messaging.EventHubs", "..\Azure.Messaging.EventHubs\src\Azure.Messaging.EventHubs.csproj", "{87A3ED70-190D-4E6B-A568-40DF5B9F3939}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Azure.Core.Experimental", "..\..\core\Azure.Core.Experimental\src\Azure.Core.Experimental.csproj", "{0BFCCE8E-85FD-4DF7-8E5D-A6CE9ACDB175}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Core", "..\..\core\Azure.Core\src\Azure.Core.csproj", "{00EBD345-7146-442E-8FC7-106FA5FC2A1A}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Core.Experimental", "..\..\core\Azure.Core.Experimental\src\Azure.Core.Experimental.csproj", "{B0B9A0F1-50DE-4059-9A2D-51059D9A477C}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand All @@ -37,18 +39,23 @@ Global
{87A3ED70-190D-4E6B-A568-40DF5B9F3939}.Debug|Any CPU.Build.0 = Debug|Any CPU
{87A3ED70-190D-4E6B-A568-40DF5B9F3939}.Release|Any CPU.ActiveCfg = Release|Any CPU
{87A3ED70-190D-4E6B-A568-40DF5B9F3939}.Release|Any CPU.Build.0 = Release|Any CPU
{0BFCCE8E-85FD-4DF7-8E5D-A6CE9ACDB175}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{0BFCCE8E-85FD-4DF7-8E5D-A6CE9ACDB175}.Debug|Any CPU.Build.0 = Debug|Any CPU
{0BFCCE8E-85FD-4DF7-8E5D-A6CE9ACDB175}.Release|Any CPU.ActiveCfg = Release|Any CPU
{0BFCCE8E-85FD-4DF7-8E5D-A6CE9ACDB175}.Release|Any CPU.Build.0 = Release|Any CPU
{00EBD345-7146-442E-8FC7-106FA5FC2A1A}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{00EBD345-7146-442E-8FC7-106FA5FC2A1A}.Debug|Any CPU.Build.0 = Debug|Any CPU
{00EBD345-7146-442E-8FC7-106FA5FC2A1A}.Release|Any CPU.ActiveCfg = Release|Any CPU
{00EBD345-7146-442E-8FC7-106FA5FC2A1A}.Release|Any CPU.Build.0 = Release|Any CPU
{B0B9A0F1-50DE-4059-9A2D-51059D9A477C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{B0B9A0F1-50DE-4059-9A2D-51059D9A477C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{B0B9A0F1-50DE-4059-9A2D-51059D9A477C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{B0B9A0F1-50DE-4059-9A2D-51059D9A477C}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{7DFF0E65-DC9A-410D-9A11-AD6A06860FE1} = {797FF941-76FD-45FD-AC17-A73DFE2BA621}
{87A3ED70-190D-4E6B-A568-40DF5B9F3939} = {797FF941-76FD-45FD-AC17-A73DFE2BA621}
{0BFCCE8E-85FD-4DF7-8E5D-A6CE9ACDB175} = {797FF941-76FD-45FD-AC17-A73DFE2BA621}
{00EBD345-7146-442E-8FC7-106FA5FC2A1A} = {797FF941-76FD-45FD-AC17-A73DFE2BA621}
{B0B9A0F1-50DE-4059-9A2D-51059D9A477C} = {797FF941-76FD-45FD-AC17-A73DFE2BA621}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {44BD3BD5-61DF-464D-8627-E00B0BC4B3A3}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,13 @@
</ItemGroup>

<ItemGroup>
<!--
TEMP: Move Core to a project reference until the
MessageWithMetadata are shipped in Azure.Core.
<!--
TEMP: Move Core to a project reference until the MessageWithMetadata are shipped in Azure.Core.
<PackageReference Include="Azure.Core" />
-->
<ProjectReference Include="..\..\..\core\Azure.Core.Experimental\src\Azure.Core.Experimental.csproj" />
<!-- END TEMP -->

<!--
TEMP: Move Event Hubs to a project reference until the
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.29519.87
# Visual Studio Version 17
VisualStudioVersion = 17.0.31912.275
MinimumVisualStudioVersion = 15.0.26124.0
Project("{D954291E-2A0B-460D-934E-DC6B0785DB48}") = "Azure.Messaging.EventHubs.Shared", "src\Azure.Messaging.EventHubs.Shared.shproj", "{905C204B-E043-4219-84E1-DC90139EA643}"
EndProject
Expand All @@ -13,9 +13,9 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "External", "External", "{10
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Messaging.EventHubs", "..\Azure.Messaging.EventHubs\src\Azure.Messaging.EventHubs.csproj", "{870D6ACA-B778-40AA-93B5-43CF1D25235E}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Azure.Core.Experimental", "..\..\core\Azure.Core.Experimental\src\Azure.Core.Experimental.csproj", "{07A39A33-E454-4404-85DC-42B603AEBD6D}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Core.Experimental", "..\..\core\Azure.Core.Experimental\src\Azure.Core.Experimental.csproj", "{07A39A33-E454-4404-85DC-42B603AEBD6D}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Azure.Core", "..\..\core\Azure.Core\src\Azure.Core.csproj", "{E3B14DB9-48E1-4B7D-A384-3247163A1421}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Core", "..\..\core\Azure.Core\src\Azure.Core.csproj", "{E3B14DB9-48E1-4B7D-A384-3247163A1421}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -94,6 +94,8 @@ Global
GlobalSection(NestedProjects) = preSolution
{1AC14D43-837B-412F-BE15-F506B351CE72} = {10D9CACA-DDF6-4A56-8633-8595F9805837}
{870D6ACA-B778-40AA-93B5-43CF1D25235E} = {10D9CACA-DDF6-4A56-8633-8595F9805837}
{07A39A33-E454-4404-85DC-42B603AEBD6D} = {10D9CACA-DDF6-4A56-8633-8595F9805837}
{E3B14DB9-48E1-4B7D-A384-3247163A1421} = {10D9CACA-DDF6-4A56-8633-8595F9805837}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {F5A1E14E-F9E2-4A0C-9EB0-F91C44298DC8}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Core.TestFramework",
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "External", "External", "{2DB233D3-E757-423C-8F8D-742B0AFF4713}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Core", "..\..\core\Azure.Core\src\Azure.Core.csproj", "{6C67B4C6-0ABF-4E10-8BAF-FCF2AD053DE3}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Core.Experimental", "..\..\core\Azure.Core.Experimental\src\Azure.Core.Experimental.csproj", "{FB10A6B3-88DD-4296-9241-53ECDF942842}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Azure.Core.Experimental", "..\..\core\Azure.Core.Experimental\src\Azure.Core.Experimental.csproj", "{FB10A6B3-88DD-4296-9241-53ECDF942842}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Azure.Core", "..\..\core\Azure.Core\src\Azure.Core.csproj", "{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand Down Expand Up @@ -84,14 +84,35 @@ Global
{FB10A6B3-88DD-4296-9241-53ECDF942842}.Release|x64.ActiveCfg = Release|Any CPU
{FB10A6B3-88DD-4296-9241-53ECDF942842}.Release|x64.Build.0 = Release|Any CPU
{FB10A6B3-88DD-4296-9241-53ECDF942842}.Release|x86.ActiveCfg = Release|Any CPU
<<<<<<< HEAD
{FB10A6B3-88DD-4296-9241-53ECDF942842}.Release|x86.Build.0 = Release|Any CPU
=======
{FB10A6B3-88DD-4296-9241-53ECDF942842}.Release|x86.Build.0 = Release|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Debug|Any CPU.Build.0 = Debug|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Debug|x64.ActiveCfg = Debug|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Debug|x64.Build.0 = Debug|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Debug|x86.ActiveCfg = Debug|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Debug|x86.Build.0 = Debug|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Release|Any CPU.ActiveCfg = Release|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Release|Any CPU.Build.0 = Release|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Release|x64.ActiveCfg = Release|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Release|x64.Build.0 = Release|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Release|x86.ActiveCfg = Release|Any CPU
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA}.Release|x86.Build.0 = Release|Any CPU
>>>>>>> b3d9d8605d ([Event Hubs Client] Idempotent Error Handling)
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(NestedProjects) = preSolution
{2CFDB3D6-5CFB-428C-9C89-29DD169B5433} = {2DB233D3-E757-423C-8F8D-742B0AFF4713}
<<<<<<< HEAD
{6C67B4C6-0ABF-4E10-8BAF-FCF2AD053DE3} = {2DB233D3-E757-423C-8F8D-742B0AFF4713}
=======
{FB10A6B3-88DD-4296-9241-53ECDF942842} = {2DB233D3-E757-423C-8F8D-742B0AFF4713}
{79B7DF40-239D-4360-A1C3-D4F92A2E75EA} = {2DB233D3-E757-423C-8F8D-742B0AFF4713}
>>>>>>> b3d9d8605d ([Event Hubs Client] Idempotent Error Handling)
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {509F2EE0-3348-4506-8AC7-9945B602CB43}
Expand Down
2 changes: 2 additions & 0 deletions sdk/eventhub/Azure.Messaging.EventHubs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

### Bugs Fixed

- Fixed an issue for publishing with idempotent retries enabled where the client and service state could become out-of-sync for error scenarios with ambiguous outcomes. When this occurred, callers had no way to detect or correct the condition and it was possible that new events would fail to publish or be incorrectly identified as duplicates by the service.

### Other Changes

- Based on a new series of profiling and testing in real-world application scenarios, the default values for `EventProcessor<T>` load balancing are being updated to provide better performance and stability. The default load balancing interval was changed from 10 seconds to 30 seconds. The default ownership expiration interval was changed from 30 seconds to 2 minutes. The default load balancing strategy has been changed from balanced to greedy.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@

<ItemGroup>
<PackageReference Include="CommandLineParser" />
<PackageReference Include="Azure.Storage.Blobs" />
<PackageReference Include="Microsoft.Azure.Management.EventHub" />
<PackageReference Include="Microsoft.Azure.Management.ResourceManager" />
<PackageReference Include="Microsoft.Azure.Management.Storage" />
<PackageReference Include="Microsoft.Azure.Services.AppAuthentication" />
<PackageReference Include="Polly" />
</ItemGroup>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,6 @@ public override async Task<IReadOnlyList<EventData>> ReceiveAsync(int maximumEve
// again after the operation completes to provide best efforts in respecting it.

EventHubsEventSource.Log.EventReceiveStart(EventHubName, ConsumerGroup, PartitionId, operationId);
cancellationToken.ThrowIfCancellationRequested<TaskCanceledException>();

link = await ReceiveLink.GetOrCreateAsync(UseMinimum(ConnectionScope.SessionTimeout, tryTimeout)).ConfigureAwait(false);
cancellationToken.ThrowIfCancellationRequested<TaskCanceledException>();
Expand Down Expand Up @@ -417,6 +416,7 @@ public override async Task CloseAsync(CancellationToken cancellationToken)
return;
}

cancellationToken.ThrowIfCancellationRequested<TaskCanceledException>();
_closed = true;

var clientId = GetHashCode().ToString(CultureInfo.InvariantCulture);
Expand All @@ -425,7 +425,6 @@ public override async Task CloseAsync(CancellationToken cancellationToken)
try
{
EventHubsEventSource.Log.ClientCloseStart(clientType, EventHubName, clientId);
cancellationToken.ThrowIfCancellationRequested<TaskCanceledException>();

if (ReceiveLink?.TryGetOpenedObject(out var _) == true)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,6 @@ public override async ValueTask<PartitionPublishingPropertiesInternal> ReadIniti

// Initialize the properties by forcing the link to be opened.

cancellationToken.ThrowIfCancellationRequested<TaskCanceledException>();

var failedAttemptCount = 0;
var tryTimeout = RetryPolicy.CalculateTryTimeout(0);

Expand Down Expand Up @@ -392,6 +390,7 @@ public override async Task CloseAsync(CancellationToken cancellationToken)
return;
}

cancellationToken.ThrowIfCancellationRequested<TaskCanceledException>();
_closed = true;

var clientId = GetHashCode().ToString(CultureInfo.InvariantCulture);
Expand All @@ -400,7 +399,6 @@ public override async Task CloseAsync(CancellationToken cancellationToken)
try
{
EventHubsEventSource.Log.ClientCloseStart(clientType, EventHubName, clientId);
cancellationToken.ThrowIfCancellationRequested<TaskCanceledException>();

if (SendLink?.TryGetOpenedObject(out var _) == true)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@
</ItemGroup>

<ItemGroup>
<!--
TEMP: Move Core to a project reference until the
MessageWithMetadata are shipped in Azure.Core.
<PackageReference Include="Azure.Core" />
<!--
TEMP: Move Core to a project reference until the MessageWithMetadata are shipped in Azure.Core.
<PackageReference Include="Azure.Core" />
-->
<ProjectReference Include="..\..\..\core\Azure.Core.Experimental\src\Azure.Core.Experimental.csproj" />
<!-- END TEMP -->

<PackageReference Include="Azure.Core.Amqp" />
<PackageReference Include="Microsoft.Azure.Amqp" />
<PackageReference Include="Microsoft.Bcl.AsyncInterfaces" />
Expand Down Expand Up @@ -63,7 +66,4 @@
<CustomToolNamespace>Azure.Messaging.EventHubs</CustomToolNamespace>
</EmbeddedResource>
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\..\core\Azure.Core.Experimental\src\Azure.Core.Experimental.csproj" />
</ItemGroup>
</Project>
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,25 @@ public virtual PooledProducer GetPooledProducer(string partitionId,
});
}

/// <summary>
/// Expires the pooled producer for the requested partition immediately.
/// </summary>
///
/// <param name="partitionId">The unique identifier of a partition associated with the Event Hub.</param>
/// <param name="forceClose"><c>true</c> to close the pooled producer even if it is in use; otherwise, <c>false</c> will defer closing until it is no longer in use.</param>
///
public virtual async Task ExpirePooledProducerAsync(string partitionId,
bool forceClose = false)
{
if (Pool.TryRemove(partitionId, out var poolItem))
{
if ((poolItem.ActiveInstances.IsEmpty) || (forceClose))
{
await poolItem.PartitionProducer.CloseAsync(CancellationToken.None).ConfigureAwait(false);
}
}
}

/// <summary>
/// Closes the producers in the pool and performs any cleanup necessary
/// for resources used by the <see cref="TransportProducerPool" />.
Expand Down Expand Up @@ -188,6 +207,7 @@ private TimerCallback CreateExpirationTimerCallback()
return _ =>
{
// Capture the time stamp to use a consistent value.
var now = DateTimeOffset.UtcNow;
foreach (var key in Pool.Keys.ToList())
Expand All @@ -196,7 +216,7 @@ private TimerCallback CreateExpirationTimerCallback()
{
if (poolItem.RemoveAfter <= now)
{
if (Pool.TryRemove(key, out var _) && !poolItem.ActiveInstances.Any())
if (Pool.TryRemove(key, out var _) && poolItem.ActiveInstances.IsEmpty)
{
// At this point the pool item may have been closed already
// if there was a context switch between the if conditions
Expand Down
30 changes: 16 additions & 14 deletions sdk/eventhub/Azure.Messaging.EventHubs/src/EventData.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
using System.IO;
using System.Text;
using Azure.Core;
using Azure.Core.Serialization;
using Azure.Core.Amqp;
using Azure.Core.Serialization;
using Azure.Messaging.EventHubs.Amqp;
using Azure.Messaging.EventHubs.Consumer;
using Azure.Messaging.EventHubs.Producer;
Expand Down Expand Up @@ -48,17 +48,6 @@ public BinaryData EventBody
set => _amqpMessage.Body = AmqpMessageBody.FromData(MessageBody.FromReadOnlyMemorySegment(value.ToMemory()));
}

/// <summary>
/// Hidden property that shadows the <see cref="EventBody"/> property. This is added
/// in order to inherit from <see cref="MessageWithMetadata"/>.
/// </summary>
[EditorBrowsable(EditorBrowsableState.Never)]
public override BinaryData Data
{
get => EventBody;
set => EventBody = value;
}

/// <summary>
/// A MIME type describing the data contained in the <see cref="EventBody" />,
/// intended to allow consumers to make informed decisions for inspecting and
Expand Down Expand Up @@ -108,9 +97,22 @@ public override string ContentType
}

/// <summary>
/// Hidden property that indicates that the <see cref="EventData"/> is not read-only. This is part of
/// the <see cref="MessageWithMetadata"/> abstraction.
/// Hidden property that shadows the <see cref="EventBody"/> property. This is added
/// in order to inherit from <see cref="MessageWithMetadata"/>.
/// </summary>
///
[EditorBrowsable(EditorBrowsableState.Never)]
public override BinaryData Data
{
get => EventBody;
set => EventBody = value;
}

/// <summary>
/// Hidden property that indicates that the <see cref="EventData"/> is not read-only. This is part of
/// the <see cref="MessageWithMetadata"/> abstraction.
/// </summary>
///
[EditorBrowsable(EditorBrowsableState.Never)]
public override bool IsReadOnly => false;

Expand Down
Loading

0 comments on commit 345c70e

Please sign in to comment.