diff --git a/src/Orleans.Runtime/GrainDirectory/CachedGrainLocator.cs b/src/Orleans.Runtime/GrainDirectory/CachedGrainLocator.cs index 1f823a9f1e..5d28a003b4 100644 --- a/src/Orleans.Runtime/GrainDirectory/CachedGrainLocator.cs +++ b/src/Orleans.Runtime/GrainDirectory/CachedGrainLocator.cs @@ -103,18 +103,21 @@ public async Task Register(GrainAddress address, GrainAddress prev } // Cache update - this.cache.AddOrUpdate(result, (int) result.MembershipVersion.Value); + this.cache.AddOrUpdate(result, (int)result.MembershipVersion.Value); return result; } public async Task Unregister(GrainAddress address, UnregistrationCause cause) { - try - { - await GetGrainDirectory(address.GrainId.Type).Unregister(address); - } - finally + // Remove from local cache first so we don't return it anymore + this.cache.Remove(address); + + // Remove from grain directory which may take significantly longer + await GetGrainDirectory(address.GrainId.Type).Unregister(address); + + // There is the potential for a lookup to race with the Unregister and add the bad entry back to the cache. + if (this.cache.LookUp(address.GrainId, out var entry, out _) && entry == address) { this.cache.Remove(address); } diff --git a/test/NonSilo.Tests/Directory/CachedGrainLocatorTests.cs b/test/NonSilo.Tests/Directory/CachedGrainLocatorTests.cs index f538e342f0..611838ae0f 100644 --- a/test/NonSilo.Tests/Directory/CachedGrainLocatorTests.cs +++ b/test/NonSilo.Tests/Directory/CachedGrainLocatorTests.cs @@ -6,6 +6,7 @@ using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; using NSubstitute; +using NSubstitute.ReceivedExtensions; using Orleans; using Orleans.GrainDirectory; using Orleans.Metadata; @@ -363,6 +364,115 @@ public async Task UnregisterCallDirectoryAndCleanCache() Assert.False(this.grainLocator.TryLookupInCache(expectedAddr.GrainId, out _)); } + [Fact] + public async Task UnregisterRemovesFromCacheFirst() + { + var expectedSilo = GenerateSiloAddress(); + + // Setup membership service + this.mockMembershipService.UpdateSiloStatus(expectedSilo, SiloStatus.Active, "exp"); + await this.lifecycle.OnStart(); + await WaitUntilClusterChangePropagated(); + + var expectedAddr = GenerateGrainAddress(expectedSilo); + + // Give up control then Run forever + this.grainDirectory.Unregister(expectedAddr).Returns(async (t) => { await Task.Yield(); while (true) { } }); + + this.grainDirectory.Register(expectedAddr, previousAddress: null).Returns(expectedAddr); + + // Register to populate cache + await this.grainLocator.Register(expectedAddr, previousAddress: null); + + // Unregister and check if cache was cleaned + _ = this.grainLocator.Unregister(expectedAddr, UnregistrationCause.Force); + Assert.False(this.grainLocator.TryLookupInCache(expectedAddr.GrainId, out _)); + } + + [Fact] + public async Task UnregisterRacesWithLookupSameId() + { + var expectedSilo = GenerateSiloAddress(); + + // Setup membership service + this.mockMembershipService.UpdateSiloStatus(expectedSilo, SiloStatus.Active, "exp"); + await this.lifecycle.OnStart(); + await WaitUntilClusterChangePropagated(); + + var expectedAddr = GenerateGrainAddress(expectedSilo); + + ManualResetEvent mre = new ManualResetEvent(false); + + // Give up control then Run forever + this.grainDirectory.Unregister(expectedAddr).Returns(async (t) => + { + await Task.Yield(); + mre.WaitOne(); + }); + + this.grainDirectory.Register(expectedAddr, previousAddress: null).Returns(expectedAddr); + + // Register to populate cache + await this.grainLocator.Register(expectedAddr, previousAddress: null); + + // Unregister and check if cache was cleaned + Task t = this.grainLocator.Unregister(expectedAddr, UnregistrationCause.Force); + Assert.False(this.grainLocator.TryLookupInCache(expectedAddr.GrainId, out _)); + + // Add back to cache simulating a race from lookup + await this.grainLocator.Register(expectedAddr, previousAddress: null); + Assert.True(this.grainLocator.TryLookupInCache(expectedAddr.GrainId, out _)); + + // Ensure when Unregister finishes if the race occured on the same id that it was removed + mre.Set(); + await t; + Assert.False(this.grainLocator.TryLookupInCache(expectedAddr.GrainId, out _)); + } + + [Fact] + public async Task UnregisterRacesWithLookupDifferentId() + { + var expectedSilo = GenerateSiloAddress(); + var secondSilo = GenerateSiloAddress(); + + // Setup membership service + this.mockMembershipService.UpdateSiloStatus(expectedSilo, SiloStatus.Active, "exp"); + this.mockMembershipService.UpdateSiloStatus(secondSilo, SiloStatus.Active, "exp"); + await this.lifecycle.OnStart(); + await WaitUntilClusterChangePropagated(); + + var expectedAddr = GenerateGrainAddress(expectedSilo); + var secondAddr = GenerateGrainAddress(secondSilo); + + ManualResetEvent mre = new ManualResetEvent(false); + + // Give up control then Run forever + this.grainDirectory.Unregister(expectedAddr).Returns(async (t) => + { + await Task.Yield(); + mre.WaitOne(); + }); + + this.grainDirectory.Register(expectedAddr, previousAddress: null).Returns(expectedAddr); + this.grainDirectory.Register(secondAddr, previousAddress: null).Returns(secondAddr); + + // Register to populate cache + await this.grainLocator.Register(expectedAddr, previousAddress: null); + + // Unregister and check if cache was cleaned + Task t = this.grainLocator.Unregister(expectedAddr, UnregistrationCause.Force); + Assert.False(this.grainLocator.TryLookupInCache(expectedAddr.GrainId, out _)); + + // Add back to cache simulating a race from lookup + await this.grainLocator.Register(secondAddr, previousAddress: null); + Assert.True(this.grainLocator.TryLookupInCache(secondAddr.GrainId, out _)); + + // Ensure when Unregister finishes if the race occured on the same id that it was removed + mre.Set(); + await t; + Assert.True(this.grainLocator.TryLookupInCache(secondAddr.GrainId, out _)); + } + private GrainAddress GenerateGrainAddress(SiloAddress siloAddress = null) { return new GrainAddress