From b5f9e660986773571a20c9a1accc461535ba4f92 Mon Sep 17 00:00:00 2001
From: Reuben Bond <203839+ReubenBond@users.noreply.github.com>
Date: Wed, 16 Oct 2024 09:26:14 -0700
Subject: [PATCH] Strong consistency, distributed, in-memory grain directory
(#9103)
---
Directory.Packages.props | 16 +-
Orleans.sln | 24 +
.../ChaoticCluster.AppHost.csproj | 23 +
.../ChaoticCluster.AppHost/Program.cs | 10 +
.../Properties/launchSettings.json | 29 +
.../appsettings.Development.json | 8 +
.../ChaoticCluster.AppHost/appsettings.json | 9 +
.../ChaoticCluster.ServiceDefaults.csproj | 22 +
.../Extensions.cs | 111 +++
.../ChaoticCluster.Silo.csproj | 19 +
.../ChaoticCluster.Silo/Program.cs | 151 ++++
.../SiloBuilderConfigurator.cs | 26 +
.../Core/IGrainBase.cs | 8 +
.../IDs/GrainAddress.cs | 21 +-
.../Runtime/MembershipVersion.cs | 2 +-
.../Diagnostics/MessagingTrace.cs | 2 +
.../Metrics/DirectoryInstruments.cs | 50 +-
.../Diagnostics/Metrics/InstrumentNames.cs | 6 +
src/Orleans.Core/Networking/Connection.cs | 1 +
src/Orleans.Core/Runtime/Constants.cs | 3 +
src/Orleans.Core/Runtime/IRuntimeClient.cs | 2 +-
.../Runtime/OutsideRuntimeClient.cs | 2 +-
.../Statistics/EnvironmentStatistics.cs | 38 +-
.../Catalog/ActivationCollector.cs | 90 +-
src/Orleans.Runtime/Catalog/ActivationData.cs | 99 +--
.../Catalog/ActivationDirectory.cs | 5 +-
.../Catalog/ActivationWorkingSet.cs | 2 +
src/Orleans.Runtime/Catalog/Catalog.cs | 134 +--
.../Catalog}/ICatalog.cs | 1 -
.../Core/InsideRuntimeClient.cs | 2 +-
.../DirectoryMembershipService.cs | 92 +++
.../DirectoryMembershipSnapshot.cs | 306 +++++++
.../GrainDirectory/DirectoryResult.cs | 32 +
.../DistributedGrainDirectory.cs | 381 +++++++++
.../GrainDirectoryHandoffManager.cs | 4 +-
.../GrainDirectoryPartitionSnapshot.cs | 16 +
.../GrainDirectoryReplica.Interface.cs | 112 +++
.../GrainDirectory/GrainDirectoryReplica.cs | 771 ++++++++++++++++++
.../GrainDirectory/GrainDirectoryResolver.cs | 4 +-
.../GrainDirectory/GrainLocatorResolver.cs | 2 +-
.../IGrainDirectoryPartition.cs | 39 +
.../GrainDirectory/LocalGrainDirectory.cs | 4 +-
...ion.cs => LocalGrainDirectoryPartition.cs} | 8 +-
.../GrainDirectory/RemoteGrainDirectory.cs | 2 +-
.../GrainDirectory/RingRange.cs | 241 ++++++
.../GrainDirectory/RingRangeCollection.cs | 224 +++++
.../Hosting/CoreHostingExtensions.cs | 37 +-
.../Hosting/DefaultSiloServices.cs | 2 +-
.../ClusterMembershipService.cs | 10 +-
.../ClusterMembershipSnapshot.cs | 2 +
.../InMemoryMembershipTable.cs | 24 +-
.../LocalSiloHealthMonitor.cs | 1 +
.../SystemTargetBasedMembershipTable.cs | 13 +-
.../Messaging/MessageCenter.cs | 42 +-
.../Networking/GatewayInboundConnection.cs | 3 +-
.../Networking/SiloConnection.cs | 15 +-
.../Scheduler/ClosureWorkItem.cs | 24 +
.../Scheduler/SchedulerExtensions.cs | 8 +
.../Scheduler/WorkItemGroup.cs | 5 +-
src/Orleans.Runtime/Silo/SiloControl.cs | 38 +-
.../Utilities/SearchAlgorithms.cs | 94 +++
.../Utilities/StripedMpscBuffer.cs | 1 -
.../ConfigureDistributedGrainDirectory.cs | 10 +
.../InProcess/InProcessMembershipTable.cs | 2 +-
src/Orleans.TestingHost/TestCluster.cs | 12 +-
src/Orleans.TestingHost/TestClusterBuilder.cs | 1 +
.../TestClusterHostFactory.cs | 2 +-
test/DefaultCluster.Tests/ObserverTests.cs | 2 +-
test/Directory.Build.props | 4 +
.../RedisGrainDirectoryTests.cs | 2 +-
.../AzureGrainDirectoryTests.cs | 20 +-
test/Grains/TestInternalGrains/TimerGrain.cs | 42 +-
.../DirectoryMembershipSnapshotTests.cs | 123 +++
.../Directory/RingRangeCollectionTests.cs | 142 ++++
.../NonSilo.Tests/Directory/RingRangeTests.cs | 183 +++++
test/NonSilo.Tests/NonSilo.Tests.csproj | 1 +
.../OrleansTaskSchedulerBasicTests.cs | 2 +-
.../Orleans.Serialization.FSharp.Tests.fsproj | 4 +-
.../Tester/Directories/GrainDirectoryTests.cs | 254 +++---
.../ConsistentRingProviderTests_Silo.cs | 7 +-
.../DistributedGrainDirectoryTests.cs | 22 +
.../GrainDirectoryResilienceTests.cs | 182 +++++
.../GrainDirectoryPartitionTests.cs | 4 +-
.../ConsistentRingProviderTests.cs | 51 +-
.../Hosting/TransactionTestExtensions.cs | 6 +-
85 files changed, 4060 insertions(+), 491 deletions(-)
create mode 100644 playground/ChaoticCluster/ChaoticCluster.AppHost/ChaoticCluster.AppHost.csproj
create mode 100644 playground/ChaoticCluster/ChaoticCluster.AppHost/Program.cs
create mode 100644 playground/ChaoticCluster/ChaoticCluster.AppHost/Properties/launchSettings.json
create mode 100644 playground/ChaoticCluster/ChaoticCluster.AppHost/appsettings.Development.json
create mode 100644 playground/ChaoticCluster/ChaoticCluster.AppHost/appsettings.json
create mode 100644 playground/ChaoticCluster/ChaoticCluster.ServiceDefaults/ChaoticCluster.ServiceDefaults.csproj
create mode 100644 playground/ChaoticCluster/ChaoticCluster.ServiceDefaults/Extensions.cs
create mode 100644 playground/ChaoticCluster/ChaoticCluster.Silo/ChaoticCluster.Silo.csproj
create mode 100644 playground/ChaoticCluster/ChaoticCluster.Silo/Program.cs
create mode 100644 playground/ChaoticCluster/ChaoticCluster.Silo/SiloBuilderConfigurator.cs
rename src/{Orleans.Core/SystemTargetInterfaces => Orleans.Runtime/Catalog}/ICatalog.cs (99%)
create mode 100644 src/Orleans.Runtime/GrainDirectory/DirectoryMembershipService.cs
create mode 100644 src/Orleans.Runtime/GrainDirectory/DirectoryMembershipSnapshot.cs
create mode 100644 src/Orleans.Runtime/GrainDirectory/DirectoryResult.cs
create mode 100644 src/Orleans.Runtime/GrainDirectory/DistributedGrainDirectory.cs
create mode 100644 src/Orleans.Runtime/GrainDirectory/GrainDirectoryPartitionSnapshot.cs
create mode 100644 src/Orleans.Runtime/GrainDirectory/GrainDirectoryReplica.Interface.cs
create mode 100644 src/Orleans.Runtime/GrainDirectory/GrainDirectoryReplica.cs
create mode 100644 src/Orleans.Runtime/GrainDirectory/IGrainDirectoryPartition.cs
rename src/Orleans.Runtime/GrainDirectory/{GrainDirectoryPartition.cs => LocalGrainDirectoryPartition.cs} (97%)
create mode 100644 src/Orleans.Runtime/GrainDirectory/RingRange.cs
create mode 100644 src/Orleans.Runtime/GrainDirectory/RingRangeCollection.cs
create mode 100644 src/Orleans.Runtime/Utilities/SearchAlgorithms.cs
create mode 100644 src/Orleans.TestingHost/ConfigureDistributedGrainDirectory.cs
create mode 100644 test/NonSilo.Tests/Directory/DirectoryMembershipSnapshotTests.cs
create mode 100644 test/NonSilo.Tests/Directory/RingRangeCollectionTests.cs
create mode 100644 test/NonSilo.Tests/Directory/RingRangeTests.cs
create mode 100644 test/TesterInternal/GrainDirectory/DistributedGrainDirectoryTests.cs
create mode 100644 test/TesterInternal/GrainDirectory/GrainDirectoryResilienceTests.cs
diff --git a/Directory.Packages.props b/Directory.Packages.props
index 656513e07f..a084e077cc 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -51,10 +51,17 @@
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
@@ -68,6 +75,7 @@
+
diff --git a/Orleans.sln b/Orleans.sln
index 0df56b5714..d5e8786f4a 100644
--- a/Orleans.sln
+++ b/Orleans.sln
@@ -242,6 +242,14 @@ Project("{6EC3EE1D-3C4E-46DD-8F32-0CC8E7565705}") = "Orleans.Serialization.FShar
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Orleans.Serialization.MessagePack", "src\Orleans.Serialization.MessagePack\Orleans.Serialization.MessagePack.csproj", "{F50F81B6-E9B5-4143-B66B-A1AD913F6E9C}"
EndProject
+Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "ChaoticCluster", "ChaoticCluster", "{2579A7F6-EBE8-485A-BB20-A5D19DB5612B}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChaoticCluster.AppHost", "playground\ChaoticCluster\ChaoticCluster.AppHost\ChaoticCluster.AppHost.csproj", "{4E79EC4B-2DC4-41E3-9AE6-17C1FFF17B02}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChaoticCluster.Silo", "playground\ChaoticCluster\ChaoticCluster.Silo\ChaoticCluster.Silo.csproj", "{76A549FA-69F1-4967-82B6-161A8B52C86B}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ChaoticCluster.ServiceDefaults", "playground\ChaoticCluster\ChaoticCluster.ServiceDefaults\ChaoticCluster.ServiceDefaults.csproj", "{4004A79F-B6BB-4472-891B-AD1348AE3E93}"
+EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TestSerializerExternalModels", "test\Misc\TestSerializerExternalModels\TestSerializerExternalModels.csproj", "{5D587DDE-036D-4694-A314-8DDF270AC031}"
EndProject
Global
@@ -634,6 +642,18 @@ Global
{F50F81B6-E9B5-4143-B66B-A1AD913F6E9C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{F50F81B6-E9B5-4143-B66B-A1AD913F6E9C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{F50F81B6-E9B5-4143-B66B-A1AD913F6E9C}.Release|Any CPU.Build.0 = Release|Any CPU
+ {4E79EC4B-2DC4-41E3-9AE6-17C1FFF17B02}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {4E79EC4B-2DC4-41E3-9AE6-17C1FFF17B02}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {4E79EC4B-2DC4-41E3-9AE6-17C1FFF17B02}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {4E79EC4B-2DC4-41E3-9AE6-17C1FFF17B02}.Release|Any CPU.Build.0 = Release|Any CPU
+ {76A549FA-69F1-4967-82B6-161A8B52C86B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {76A549FA-69F1-4967-82B6-161A8B52C86B}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {76A549FA-69F1-4967-82B6-161A8B52C86B}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {76A549FA-69F1-4967-82B6-161A8B52C86B}.Release|Any CPU.Build.0 = Release|Any CPU
+ {4004A79F-B6BB-4472-891B-AD1348AE3E93}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {4004A79F-B6BB-4472-891B-AD1348AE3E93}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {4004A79F-B6BB-4472-891B-AD1348AE3E93}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {4004A79F-B6BB-4472-891B-AD1348AE3E93}.Release|Any CPU.Build.0 = Release|Any CPU
{5D587DDE-036D-4694-A314-8DDF270AC031}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{5D587DDE-036D-4694-A314-8DDF270AC031}.Debug|Any CPU.Build.0 = Debug|Any CPU
{5D587DDE-036D-4694-A314-8DDF270AC031}.Release|Any CPU.ActiveCfg = Release|Any CPU
@@ -754,6 +774,10 @@ Global
{84B44F1D-B7FE-40E3-82F0-730A55AC8613} = {316CDCC7-323F-4264-9FC9-667662BB1F80}
{B2D53D3C-E44A-4C9B-AAEE-28FB8C1BDF62} = {A6573187-FD0D-4DF7-91D1-03E07E470C0A}
{F50F81B6-E9B5-4143-B66B-A1AD913F6E9C} = {4CD3AA9E-D937-48CA-BB6C-158E12257D23}
+ {2579A7F6-EBE8-485A-BB20-A5D19DB5612B} = {A41DE3D1-F8AA-4234-BE6F-3C9646A1507A}
+ {4E79EC4B-2DC4-41E3-9AE6-17C1FFF17B02} = {2579A7F6-EBE8-485A-BB20-A5D19DB5612B}
+ {76A549FA-69F1-4967-82B6-161A8B52C86B} = {2579A7F6-EBE8-485A-BB20-A5D19DB5612B}
+ {4004A79F-B6BB-4472-891B-AD1348AE3E93} = {2579A7F6-EBE8-485A-BB20-A5D19DB5612B}
{5D587DDE-036D-4694-A314-8DDF270AC031} = {70BCC54E-1618-4742-A079-07588065E361}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
diff --git a/playground/ChaoticCluster/ChaoticCluster.AppHost/ChaoticCluster.AppHost.csproj b/playground/ChaoticCluster/ChaoticCluster.AppHost/ChaoticCluster.AppHost.csproj
new file mode 100644
index 0000000000..95ad36182c
--- /dev/null
+++ b/playground/ChaoticCluster/ChaoticCluster.AppHost/ChaoticCluster.AppHost.csproj
@@ -0,0 +1,23 @@
+
+
+
+ Exe
+ net8.0
+ enable
+ enable
+
+
+
+
+ 8cceaca4-1c1f-473f-ac3a-6f220c8791cf
+
+
+
+
+
+
+
+
+
+
+
diff --git a/playground/ChaoticCluster/ChaoticCluster.AppHost/Program.cs b/playground/ChaoticCluster/ChaoticCluster.AppHost/Program.cs
new file mode 100644
index 0000000000..a3147fd1ec
--- /dev/null
+++ b/playground/ChaoticCluster/ChaoticCluster.AppHost/Program.cs
@@ -0,0 +1,10 @@
+using Projects;
+
+var builder = DistributedApplication.CreateBuilder(args);
+
+/*
+// Comment this out once Aspire no longer requires a 'workload' to build.
+builder.AddProject("silo");
+*/
+
+builder.Build().Run();
diff --git a/playground/ChaoticCluster/ChaoticCluster.AppHost/Properties/launchSettings.json b/playground/ChaoticCluster/ChaoticCluster.AppHost/Properties/launchSettings.json
new file mode 100644
index 0000000000..de31dd2521
--- /dev/null
+++ b/playground/ChaoticCluster/ChaoticCluster.AppHost/Properties/launchSettings.json
@@ -0,0 +1,29 @@
+{
+ "$schema": "https://json.schemastore.org/launchsettings.json",
+ "profiles": {
+ "https": {
+ "commandName": "Project",
+ "dotnetRunMessages": true,
+ "launchBrowser": true,
+ "applicationUrl": "https://localhost:17213;http://localhost:15139",
+ "environmentVariables": {
+ "ASPNETCORE_ENVIRONMENT": "Development",
+ "DOTNET_ENVIRONMENT": "Development",
+ "DOTNET_DASHBOARD_OTLP_ENDPOINT_URL": "https://localhost:21045",
+ "DOTNET_RESOURCE_SERVICE_ENDPOINT_URL": "https://localhost:22043"
+ }
+ },
+ "http": {
+ "commandName": "Project",
+ "dotnetRunMessages": true,
+ "launchBrowser": true,
+ "applicationUrl": "http://localhost:15139",
+ "environmentVariables": {
+ "ASPNETCORE_ENVIRONMENT": "Development",
+ "DOTNET_ENVIRONMENT": "Development",
+ "DOTNET_DASHBOARD_OTLP_ENDPOINT_URL": "http://localhost:19150",
+ "DOTNET_RESOURCE_SERVICE_ENDPOINT_URL": "http://localhost:20085"
+ }
+ }
+ }
+}
diff --git a/playground/ChaoticCluster/ChaoticCluster.AppHost/appsettings.Development.json b/playground/ChaoticCluster/ChaoticCluster.AppHost/appsettings.Development.json
new file mode 100644
index 0000000000..0c208ae918
--- /dev/null
+++ b/playground/ChaoticCluster/ChaoticCluster.AppHost/appsettings.Development.json
@@ -0,0 +1,8 @@
+{
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning"
+ }
+ }
+}
diff --git a/playground/ChaoticCluster/ChaoticCluster.AppHost/appsettings.json b/playground/ChaoticCluster/ChaoticCluster.AppHost/appsettings.json
new file mode 100644
index 0000000000..31c092aa45
--- /dev/null
+++ b/playground/ChaoticCluster/ChaoticCluster.AppHost/appsettings.json
@@ -0,0 +1,9 @@
+{
+ "Logging": {
+ "LogLevel": {
+ "Default": "Information",
+ "Microsoft.AspNetCore": "Warning",
+ "Aspire.Hosting.Dcp": "Warning"
+ }
+ }
+}
diff --git a/playground/ChaoticCluster/ChaoticCluster.ServiceDefaults/ChaoticCluster.ServiceDefaults.csproj b/playground/ChaoticCluster/ChaoticCluster.ServiceDefaults/ChaoticCluster.ServiceDefaults.csproj
new file mode 100644
index 0000000000..2388aea655
--- /dev/null
+++ b/playground/ChaoticCluster/ChaoticCluster.ServiceDefaults/ChaoticCluster.ServiceDefaults.csproj
@@ -0,0 +1,22 @@
+
+
+
+ net8.0
+ enable
+ enable
+ true
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/playground/ChaoticCluster/ChaoticCluster.ServiceDefaults/Extensions.cs b/playground/ChaoticCluster/ChaoticCluster.ServiceDefaults/Extensions.cs
new file mode 100644
index 0000000000..29dcb42871
--- /dev/null
+++ b/playground/ChaoticCluster/ChaoticCluster.ServiceDefaults/Extensions.cs
@@ -0,0 +1,111 @@
+using Microsoft.AspNetCore.Builder;
+using Microsoft.AspNetCore.Diagnostics.HealthChecks;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Diagnostics.HealthChecks;
+using Microsoft.Extensions.Logging;
+using OpenTelemetry;
+using OpenTelemetry.Metrics;
+using OpenTelemetry.Trace;
+
+namespace Microsoft.Extensions.Hosting;
+// Adds common .NET Aspire services: service discovery, resilience, health checks, and OpenTelemetry.
+// This project should be referenced by each service project in your solution.
+// To learn more about using this project, see https://aka.ms/dotnet/aspire/service-defaults
+public static class Extensions
+{
+ public static IHostApplicationBuilder AddServiceDefaults(this IHostApplicationBuilder builder)
+ {
+ builder.ConfigureOpenTelemetry();
+
+ builder.AddDefaultHealthChecks();
+
+ builder.Services.AddServiceDiscovery();
+
+ builder.Services.ConfigureHttpClientDefaults(http =>
+ {
+ // Turn on resilience by default
+ http.AddStandardResilienceHandler();
+
+ // Turn on service discovery by default
+ http.AddServiceDiscovery();
+ });
+
+ // Uncomment the following to restrict the allowed schemes for service discovery.
+ // builder.Services.Configure(options =>
+ // {
+ // options.AllowedSchemes = ["https"];
+ // });
+
+ return builder;
+ }
+
+ public static IHostApplicationBuilder ConfigureOpenTelemetry(this IHostApplicationBuilder builder)
+ {
+ builder.Logging.AddOpenTelemetry(logging =>
+ {
+ logging.IncludeFormattedMessage = true;
+ logging.IncludeScopes = true;
+ });
+
+ builder.Services.AddOpenTelemetry()
+ .WithMetrics(metrics =>
+ {
+ metrics.AddAspNetCoreInstrumentation()
+ .AddHttpClientInstrumentation()
+ .AddRuntimeInstrumentation()
+ .AddMeter("System.Runtime")
+ .AddMeter("Microsoft.Orleans");
+ });
+
+ builder.AddOpenTelemetryExporters();
+
+ return builder;
+ }
+
+ private static IHostApplicationBuilder AddOpenTelemetryExporters(this IHostApplicationBuilder builder)
+ {
+ var useOtlpExporter = !string.IsNullOrWhiteSpace(builder.Configuration["OTEL_EXPORTER_OTLP_ENDPOINT"]);
+
+ if (useOtlpExporter)
+ {
+ builder.Services.AddOpenTelemetry().UseOtlpExporter();
+ }
+
+ // Uncomment the following lines to enable the Azure Monitor exporter (requires the Azure.Monitor.OpenTelemetry.AspNetCore package)
+ //if (!string.IsNullOrEmpty(builder.Configuration["APPLICATIONINSIGHTS_CONNECTION_STRING"]))
+ //{
+ // builder.Services.AddOpenTelemetry()
+ // .UseAzureMonitor();
+ //}
+
+ return builder;
+ }
+
+ public static IHostApplicationBuilder AddDefaultHealthChecks(this IHostApplicationBuilder builder)
+ {
+ builder.Services.AddHealthChecks()
+ // Add a default liveness check to ensure app is responsive
+ .AddCheck("self", () => HealthCheckResult.Healthy(), ["live"]);
+
+ return builder;
+ }
+
+ public static WebApplication MapDefaultEndpoints(this WebApplication app)
+ {
+ // Adding health checks endpoints to applications in non-development environments has security implications.
+ // See https://aka.ms/dotnet/aspire/healthchecks for details before enabling these endpoints in non-development environments.
+ if (app.Environment.IsDevelopment())
+ {
+ // All health checks must pass for app to be considered ready to accept traffic after starting
+ app.MapHealthChecks("/health");
+
+ // Only health checks tagged with the "live" tag must pass for app to be considered alive
+ app.MapHealthChecks("/alive", new HealthCheckOptions
+ {
+ Predicate = r => r.Tags.Contains("live")
+ });
+ }
+
+ return app;
+ }
+}
diff --git a/playground/ChaoticCluster/ChaoticCluster.Silo/ChaoticCluster.Silo.csproj b/playground/ChaoticCluster/ChaoticCluster.Silo/ChaoticCluster.Silo.csproj
new file mode 100644
index 0000000000..6dfb9074aa
--- /dev/null
+++ b/playground/ChaoticCluster/ChaoticCluster.Silo/ChaoticCluster.Silo.csproj
@@ -0,0 +1,19 @@
+
+
+
+ Exe
+ net8.0
+ enable
+ enable
+ true
+ true
+
+
+
+
+
+
+
+
+
+
diff --git a/playground/ChaoticCluster/ChaoticCluster.Silo/Program.cs b/playground/ChaoticCluster/ChaoticCluster.Silo/Program.cs
new file mode 100644
index 0000000000..01869077fd
--- /dev/null
+++ b/playground/ChaoticCluster/ChaoticCluster.Silo/Program.cs
@@ -0,0 +1,151 @@
+using System.Diagnostics;
+using ChaoticCluster.Silo;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Hosting;
+using Microsoft.Extensions.Logging;
+using Orleans.TestingHost;
+
+var builder = Host.CreateApplicationBuilder(args);
+builder.AddServiceDefaults(); // Configure OTel
+using var app = builder.Build();
+await app.StartAsync();
+
+var testClusterBuilder = new InProcessTestClusterBuilder(1);
+testClusterBuilder.ConfigureSilo((options, siloBuilder) => new SiloBuilderConfigurator().Configure(siloBuilder));
+testClusterBuilder.ConfigureSiloHost((options, hostBuilder) =>
+{
+ foreach (var provider in app.Services.GetServices())
+ {
+ hostBuilder.Logging.AddProvider(provider);
+ }
+});
+
+testClusterBuilder.ConfigureClientHost(hostBuilder =>
+{
+ foreach (var provider in app.Services.GetServices())
+ {
+ hostBuilder.Logging.AddProvider(provider);
+ }
+});
+
+var testCluster = testClusterBuilder.Build();
+await testCluster.DeployAsync();
+var log = testCluster.Client.ServiceProvider.GetRequiredService>();
+log.LogInformation($"ServiceId: {testCluster.Options.ServiceId}");
+log.LogInformation($"ClusterId: {testCluster.Options.ClusterId}");
+
+var cts = new CancellationTokenSource(TimeSpan.FromMinutes(15));
+var reconfigurationTimer = Stopwatch.StartNew();
+var upperLimit = 10;
+var lowerLimit = 1; // Membership is kept on the primary, so we can't go below 1
+var target = upperLimit;
+var idBase = 0L;
+var client = testCluster.Silos[0].ServiceProvider.GetRequiredService();
+const int CallsPerIteration = 100;
+const int MaxGrains = 524_288; // 2**19;
+
+var loadTask = Task.Run(async () =>
+{
+ while (!cts.IsCancellationRequested)
+ {
+ var time = Stopwatch.StartNew();
+ var tasks = Enumerable.Range(0, CallsPerIteration).Select(i => client.GetGrain((idBase + i) % MaxGrains).Ping().AsTask()).ToList();
+ var workTask = Task.WhenAll(tasks);
+ using var delayCancellation = new CancellationTokenSource();
+ var delay = TimeSpan.FromMilliseconds(90_000);
+ var delayTask = Task.Delay(delay, delayCancellation.Token);
+ await Task.WhenAny(workTask, delayTask);
+
+ try
+ {
+ await workTask;
+ }
+ catch (SiloUnavailableException sue)
+ {
+ log.LogInformation(sue, "Swallowed transient exception.");
+ }
+ catch (OrleansMessageRejectionException omre)
+ {
+ log.LogInformation(omre, "Swallowed rejection.");
+ }
+ catch (Exception exception)
+ {
+ log.LogError(exception, "Unhandled exception.");
+ throw;
+ }
+
+ delayCancellation.Cancel();
+ idBase += CallsPerIteration;
+ }
+});
+
+var chaosTask = Task.Run(async () =>
+{
+ var clusterOperation = Task.CompletedTask;
+ while (!cts.IsCancellationRequested)
+ {
+ try
+ {
+ var remaining = TimeSpan.FromSeconds(10) - reconfigurationTimer.Elapsed;
+ if (remaining <= TimeSpan.Zero)
+ {
+ reconfigurationTimer.Restart();
+ await clusterOperation;
+
+ clusterOperation = Task.Run(async () =>
+ {
+ var currentCount = testCluster.Silos.Count;
+
+ if (currentCount > target)
+ {
+ // Stop or kill a random silo, but not the primary (since that hosts cluster membership)
+ var victim = testCluster.Silos[Random.Shared.Next(1, testCluster.Silos.Count - 1)];
+ if (currentCount % 2 == 0)
+ {
+ log.LogInformation($"Stopping '{victim.SiloAddress}'.");
+ await testCluster.StopSiloAsync(victim);
+ log.LogInformation($"Stopped '{victim.SiloAddress}'.");
+ }
+ else
+ {
+ log.LogInformation($"Killing '{victim.SiloAddress}'.");
+ await testCluster.KillSiloAsync(victim);
+ log.LogInformation($"Killed '{victim.SiloAddress}'.");
+ }
+ }
+ else if (currentCount < target)
+ {
+ log.LogInformation("Starting new silo.");
+ var result = await testCluster.StartAdditionalSiloAsync();
+ log.LogInformation($"Started '{result.SiloAddress}'.");
+ }
+
+ if (currentCount <= lowerLimit)
+ {
+ target = upperLimit;
+ }
+ else if (currentCount >= upperLimit)
+ {
+ target = lowerLimit;
+ }
+ });
+ }
+ else
+ {
+ await Task.Delay(remaining);
+ }
+ }
+ catch (Exception exception)
+ {
+ log.LogInformation(exception, "Ignoring chaos exception.");
+ }
+ }
+});
+
+await await Task.WhenAny(loadTask, chaosTask);
+cts.Cancel();
+await Task.WhenAll(loadTask, chaosTask);
+await testCluster.StopAllSilosAsync();
+await testCluster.DisposeAsync();
+
+await app.StopAsync();
\ No newline at end of file
diff --git a/playground/ChaoticCluster/ChaoticCluster.Silo/SiloBuilderConfigurator.cs b/playground/ChaoticCluster/ChaoticCluster.Silo/SiloBuilderConfigurator.cs
new file mode 100644
index 0000000000..aac181b83f
--- /dev/null
+++ b/playground/ChaoticCluster/ChaoticCluster.Silo/SiloBuilderConfigurator.cs
@@ -0,0 +1,26 @@
+using Microsoft.Extensions.DependencyInjection;
+using Orleans.Configuration;
+using Orleans.TestingHost;
+
+namespace ChaoticCluster.Silo;
+
+class SiloBuilderConfigurator : ISiloConfigurator
+ {
+ public void Configure(ISiloBuilder siloBuilder)
+ {
+#pragma warning disable ORLEANSEXP002 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+ siloBuilder.AddDistributedGrainDirectory();
+#pragma warning restore ORLEANSEXP002 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+ }
+ }
+
+internal interface IMyTestGrain : IGrainWithIntegerKey
+{
+ ValueTask Ping();
+}
+
+[CollectionAgeLimit(Minutes = 1.01)]
+internal class MyTestGrain : Grain, IMyTestGrain
+{
+ public ValueTask Ping() => default;
+}
diff --git a/src/Orleans.Core.Abstractions/Core/IGrainBase.cs b/src/Orleans.Core.Abstractions/Core/IGrainBase.cs
index a0145673f8..25b806c5b3 100644
--- a/src/Orleans.Core.Abstractions/Core/IGrainBase.cs
+++ b/src/Orleans.Core.Abstractions/Core/IGrainBase.cs
@@ -299,4 +299,12 @@ public enum DeactivationReasonCode : byte
///
Migrating,
}
+
+ internal static class DeactivationReasonCodeExtensions
+ {
+ public static bool IsTransientError(this DeactivationReasonCode reasonCode)
+ {
+ return reasonCode is DeactivationReasonCode.DirectoryFailure;
+ }
+ }
}
diff --git a/src/Orleans.Core.Abstractions/IDs/GrainAddress.cs b/src/Orleans.Core.Abstractions/IDs/GrainAddress.cs
index 94c1ab9a9d..4224b88425 100644
--- a/src/Orleans.Core.Abstractions/IDs/GrainAddress.cs
+++ b/src/Orleans.Core.Abstractions/IDs/GrainAddress.cs
@@ -1,4 +1,6 @@
using System;
+using System.Diagnostics.CodeAnalysis;
+using System.Runtime.CompilerServices;
using System.Text.Json.Serialization;
using Orleans.GrainDirectory;
@@ -46,8 +48,9 @@ public sealed class GrainAddress : IEquatable, ISpanFormattable
public bool Equals(GrainAddress? other)
{
- return other != null && (SiloAddress?.Equals(other.SiloAddress) ?? other.SiloAddress is null)
- && _grainId.Equals(other._grainId) && _activationId.Equals(other._activationId) && MembershipVersion == other.MembershipVersion;
+ if (ReferenceEquals(this, other)) return true;
+ return MatchesGrainIdAndSilo(this, other)
+ && _activationId.Equals(other._activationId);
}
///
@@ -56,15 +59,21 @@ public bool Equals(GrainAddress? other)
///
/// The other to compare this one with.
/// Returns true if the two are considered to match.
- public bool Matches(GrainAddress other)
+ public bool Matches(GrainAddress? other)
{
- return other is not null && _grainId.Equals(other._grainId) && (SiloAddress?.Equals(other.SiloAddress) ?? other.SiloAddress is null)
+ if (ReferenceEquals(this, other)) return true;
+ return MatchesGrainIdAndSilo(this, other)
&& (_activationId.IsDefault || other._activationId.IsDefault || _activationId.Equals(other._activationId));
}
- internal static bool MatchesGrainIdAndSilo(GrainAddress address, GrainAddress other)
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ internal static bool MatchesGrainIdAndSilo([NotNullWhen(true)] GrainAddress? address, [NotNullWhen(true)] GrainAddress? other)
{
- return other is not null && address.GrainId.Equals(other.GrainId) && (address.SiloAddress?.Equals(other.SiloAddress) ?? other.SiloAddress is null);
+ return other is not null
+ && address is not null
+ && address.GrainId.Equals(other.GrainId)
+ && !(address.SiloAddress is null ^ other.SiloAddress is null)
+ && (address.SiloAddress is null || address.SiloAddress.Equals(other.SiloAddress));
}
public override int GetHashCode() => HashCode.Combine(SiloAddress, _grainId, _activationId);
diff --git a/src/Orleans.Core.Abstractions/Runtime/MembershipVersion.cs b/src/Orleans.Core.Abstractions/Runtime/MembershipVersion.cs
index f3df4e2476..98f47a291f 100644
--- a/src/Orleans.Core.Abstractions/Runtime/MembershipVersion.cs
+++ b/src/Orleans.Core.Abstractions/Runtime/MembershipVersion.cs
@@ -44,7 +44,7 @@ public MembershipVersion(long version)
public override int GetHashCode() => this.Value.GetHashCode();
///
- public override string ToString() => this.Value.ToString();
+ public override string ToString() => Value != MinValue.Value ? $"{Value}" : "default";
///
/// Compares the provided operands for equality.
diff --git a/src/Orleans.Core/Diagnostics/MessagingTrace.cs b/src/Orleans.Core/Diagnostics/MessagingTrace.cs
index aaefa946cf..9f44a421a9 100644
--- a/src/Orleans.Core/Diagnostics/MessagingTrace.cs
+++ b/src/Orleans.Core/Diagnostics/MessagingTrace.cs
@@ -1,5 +1,7 @@
using System;
using System.Diagnostics;
+using System.Globalization;
+using System.IO;
using System.Runtime.CompilerServices;
using Microsoft.Extensions.Logging;
diff --git a/src/Orleans.Core/Diagnostics/Metrics/DirectoryInstruments.cs b/src/Orleans.Core/Diagnostics/Metrics/DirectoryInstruments.cs
index 9bc752ea63..e59e3a9dde 100644
--- a/src/Orleans.Core/Diagnostics/Metrics/DirectoryInstruments.cs
+++ b/src/Orleans.Core/Diagnostics/Metrics/DirectoryInstruments.cs
@@ -5,21 +5,27 @@ namespace Orleans.Runtime;
internal static class DirectoryInstruments
{
- internal static Counter LookupsLocalIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_LOCAL_ISSUED);
- internal static Counter LookupsLocalSuccesses = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_LOCAL_SUCCESSES);
+ internal static readonly Counter LookupsLocalIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_LOCAL_ISSUED);
+ internal static readonly Counter LookupsLocalSuccesses = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_LOCAL_SUCCESSES);
- internal static Counter LookupsFullIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_FULL_ISSUED);
+ internal static readonly Counter LookupsFullIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_FULL_ISSUED);
- internal static Counter LookupsRemoteSent = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_REMOTE_SENT);
- internal static Counter LookupsRemoteReceived = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_REMOTE_RECEIVED);
+ internal static readonly Counter LookupsRemoteSent = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_REMOTE_SENT);
+ internal static readonly Counter LookupsRemoteReceived = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_REMOTE_RECEIVED);
- internal static Counter LookupsLocalDirectoryIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_LOCALDIRECTORY_ISSUED);
- internal static Counter LookupsLocalDirectorySuccesses = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_LOCALDIRECTORY_SUCCESSES);
+ internal static readonly Counter LookupsLocalDirectoryIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_LOCALDIRECTORY_ISSUED);
+ internal static readonly Counter LookupsLocalDirectorySuccesses = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_LOCALDIRECTORY_SUCCESSES);
- internal static Counter LookupsCacheIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_CACHE_ISSUED);
- internal static Counter LookupsCacheSuccesses = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_CACHE_SUCCESSES);
- internal static Counter ValidationsCacheSent = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_VALIDATIONS_CACHE_SENT);
- internal static Counter ValidationsCacheReceived = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_VALIDATIONS_CACHE_RECEIVED);
+ internal static readonly Counter LookupsCacheIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_CACHE_ISSUED);
+ internal static readonly Counter LookupsCacheSuccesses = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_LOOKUPS_CACHE_SUCCESSES);
+ internal static readonly Counter ValidationsCacheSent = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_VALIDATIONS_CACHE_SENT);
+ internal static readonly Counter ValidationsCacheReceived = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_VALIDATIONS_CACHE_RECEIVED);
+
+ internal static readonly Counter SnapshotTransferCount = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_RANGE_SNAPSHOT_TRANSFER_COUNT);
+ internal static readonly Histogram SnapshotTransferDuration = Instruments.Meter.CreateHistogram(InstrumentNames.DIRECTORY_RANGE_SNAPSHOT_TRANSFER_DURATION);
+ internal static readonly Counter RangeRecoveryCount = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_RANGE_RECOVERY_COUNT);
+ internal static readonly Histogram RangeRecoveryDuration = Instruments.Meter.CreateHistogram(InstrumentNames.DIRECTORY_RANGE_RECOVERY_DURATION);
+ internal static readonly Histogram RangeLockHeldDuration = Instruments.Meter.CreateHistogram(InstrumentNames.DIRECTORY_RANGE_LOCK_HELD_DURATION);
internal static ObservableGauge DirectoryPartitionSize;
internal static void RegisterDirectoryPartitionSizeObserve(Func observeValue)
@@ -57,15 +63,15 @@ internal static void RegisterMyPortionAverageRingPercentageObserve(Func o
MyPortionAverageRingPercentage = Instruments.Meter.CreateObservableGauge(InstrumentNames.DIRECTORY_RING_MYPORTION_AVERAGERINGPERCENTAGE, observeValue);
}
- internal static Counter RegistrationsSingleActIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_REGISTRATIONS_SINGLE_ACT_ISSUED);
- internal static Counter RegistrationsSingleActLocal = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_REGISTRATIONS_SINGLE_ACT_LOCAL);
- internal static Counter RegistrationsSingleActRemoteSent = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_REGISTRATIONS_SINGLE_ACT_REMOTE_SENT);
- internal static Counter RegistrationsSingleActRemoteReceived = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_REGISTRATIONS_SINGLE_ACT_REMOTE_RECEIVED);
- internal static Counter UnregistrationsIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_ISSUED);
- internal static Counter UnregistrationsLocal = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_LOCAL);
- internal static Counter UnregistrationsRemoteSent = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_REMOTE_SENT);
- internal static Counter UnregistrationsRemoteReceived = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_REMOTE_RECEIVED);
- internal static Counter UnregistrationsManyIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_MANY_ISSUED);
- internal static Counter UnregistrationsManyRemoteSent = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_MANY_REMOTE_SENT);
- internal static Counter UnregistrationsManyRemoteReceived = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_MANY_REMOTE_RECEIVED);
+ internal static readonly Counter RegistrationsSingleActIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_REGISTRATIONS_SINGLE_ACT_ISSUED);
+ internal static readonly Counter RegistrationsSingleActLocal = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_REGISTRATIONS_SINGLE_ACT_LOCAL);
+ internal static readonly Counter RegistrationsSingleActRemoteSent = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_REGISTRATIONS_SINGLE_ACT_REMOTE_SENT);
+ internal static readonly Counter RegistrationsSingleActRemoteReceived = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_REGISTRATIONS_SINGLE_ACT_REMOTE_RECEIVED);
+ internal static readonly Counter UnregistrationsIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_ISSUED);
+ internal static readonly Counter UnregistrationsLocal = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_LOCAL);
+ internal static readonly Counter UnregistrationsRemoteSent = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_REMOTE_SENT);
+ internal static readonly Counter UnregistrationsRemoteReceived = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_REMOTE_RECEIVED);
+ internal static readonly Counter UnregistrationsManyIssued = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_MANY_ISSUED);
+ internal static readonly Counter UnregistrationsManyRemoteSent = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_MANY_REMOTE_SENT);
+ internal static readonly Counter UnregistrationsManyRemoteReceived = Instruments.Meter.CreateCounter(InstrumentNames.DIRECTORY_UNREGISTRATIONS_MANY_REMOTE_RECEIVED);
}
diff --git a/src/Orleans.Core/Diagnostics/Metrics/InstrumentNames.cs b/src/Orleans.Core/Diagnostics/Metrics/InstrumentNames.cs
index f40fe71ff8..cf1a8ec643 100644
--- a/src/Orleans.Core/Diagnostics/Metrics/InstrumentNames.cs
+++ b/src/Orleans.Core/Diagnostics/Metrics/InstrumentNames.cs
@@ -83,6 +83,12 @@ internal static class InstrumentNames
public const string DIRECTORY_UNREGISTRATIONS_MANY_REMOTE_SENT = "orleans-directory-unregistrations-many-remote-sent";
public const string DIRECTORY_UNREGISTRATIONS_MANY_REMOTE_RECEIVED = "orleans-directory-unregistrations-many-remote-received";
+ public const string DIRECTORY_RANGE_SNAPSHOT_TRANSFER_COUNT = "orleans-directory-snapshot-transfer-count";
+ public const string DIRECTORY_RANGE_SNAPSHOT_TRANSFER_DURATION = "orleans-directory-snapshot-transfer-duration";
+ public const string DIRECTORY_RANGE_RECOVERY_COUNT = "orleans-directory-recovery-count";
+ public const string DIRECTORY_RANGE_RECOVERY_DURATION = "orleans-directory-recovery-duration";
+ public const string DIRECTORY_RANGE_LOCK_HELD_DURATION = "orleans-directory-range-lock-held-duration";
+
// ConsistentRing
public const string CONSISTENTRING_SIZE = "orleans-consistent-ring-size";
public const string CONSISTENTRING_LOCAL_SIZE_PERCENTAGE = "orleans-consistent-ring-range-percentage-local";
diff --git a/src/Orleans.Core/Networking/Connection.cs b/src/Orleans.Core/Networking/Connection.cs
index 9892458d3c..88e52018b5 100644
--- a/src/Orleans.Core/Networking/Connection.cs
+++ b/src/Orleans.Core/Networking/Connection.cs
@@ -58,6 +58,7 @@ protected Connection(
this.LocalEndPoint = NormalizeEndpoint(this.Context.LocalEndPoint);
}
+ public ConnectionCommon Shared => shared;
public string ConnectionId => this.Context?.ConnectionId;
public virtual EndPoint RemoteEndPoint { get; }
public virtual EndPoint LocalEndPoint { get; }
diff --git a/src/Orleans.Core/Runtime/Constants.cs b/src/Orleans.Core/Runtime/Constants.cs
index 39aaa6ac12..198908e73a 100644
--- a/src/Orleans.Core/Runtime/Constants.cs
+++ b/src/Orleans.Core/Runtime/Constants.cs
@@ -26,6 +26,8 @@ internal static class Constants
public static readonly GrainType ManifestProviderType = SystemTargetGrainId.CreateGrainType("manifest");
public static readonly GrainType ActivationMigratorType = SystemTargetGrainId.CreateGrainType("migrator");
public static readonly GrainType ActivationRepartitionerType = SystemTargetGrainId.CreateGrainType("repartitioner");
+ public static readonly GrainType GrainDirectoryPartition = SystemTargetGrainId.CreateGrainType("dir.grain.part");
+ public static readonly GrainType GrainDirectory = SystemTargetGrainId.CreateGrainType("dir.grain");
public static readonly GrainId SiloDirectConnectionId = GrainId.Create(
GrainType.Create(GrainTypePrefix.SystemPrefix + "silo"),
@@ -53,6 +55,7 @@ internal static class Constants
{ManifestProviderType, "ManifestProvider"},
{ActivationMigratorType, "ActivationMigrator"},
{ActivationRepartitionerType, "ActivationRepartitioner"},
+ {GrainDirectory, "GrainDirectory"},
}.ToFrozenDictionary();
public static string SystemTargetName(GrainType id) => SingletonSystemTargetNames.TryGetValue(id, out var name) ? name : id.ToString();
diff --git a/src/Orleans.Core/Runtime/IRuntimeClient.cs b/src/Orleans.Core/Runtime/IRuntimeClient.cs
index 768684c955..bc18406f24 100644
--- a/src/Orleans.Core/Runtime/IRuntimeClient.cs
+++ b/src/Orleans.Core/Runtime/IRuntimeClient.cs
@@ -56,7 +56,7 @@ internal interface IRuntimeClient
IGrainReferenceRuntime GrainReferenceRuntime { get; }
- void BreakOutstandingMessagesToDeadSilo(SiloAddress deadSilo);
+ void BreakOutstandingMessagesToSilo(SiloAddress deadSilo);
// For testing purposes only.
int GetRunningRequestsCount(GrainInterfaceType grainInterfaceType);
diff --git a/src/Orleans.Core/Runtime/OutsideRuntimeClient.cs b/src/Orleans.Core/Runtime/OutsideRuntimeClient.cs
index d9dff1864b..bf37494993 100644
--- a/src/Orleans.Core/Runtime/OutsideRuntimeClient.cs
+++ b/src/Orleans.Core/Runtime/OutsideRuntimeClient.cs
@@ -397,7 +397,7 @@ public void Dispose()
disposed = true;
}
- public void BreakOutstandingMessagesToDeadSilo(SiloAddress deadSilo)
+ public void BreakOutstandingMessagesToSilo(SiloAddress deadSilo)
{
foreach (var callback in callbacks)
{
diff --git a/src/Orleans.Core/Statistics/EnvironmentStatistics.cs b/src/Orleans.Core/Statistics/EnvironmentStatistics.cs
index 3a878f3e77..24c3ecc758 100644
--- a/src/Orleans.Core/Statistics/EnvironmentStatistics.cs
+++ b/src/Orleans.Core/Statistics/EnvironmentStatistics.cs
@@ -36,30 +36,30 @@ public EnvironmentStatisticsProvider()
}
///
- public EnvironmentStatistics GetEnvironmentStatistics()
- {
- var memoryInfo = GC.GetGCMemoryInfo();
+public EnvironmentStatistics GetEnvironmentStatistics()
+{
+ var memoryInfo = GC.GetGCMemoryInfo();
- var cpuUsage = _eventCounterListener.CpuUsage;
- var memoryUsage = GC.GetTotalMemory(false) + memoryInfo.FragmentedBytes;
+ var cpuUsage = _eventCounterListener.CpuUsage;
+ var memoryUsage = GC.GetTotalMemory(false) + memoryInfo.FragmentedBytes;
- var committedOfLimit = memoryInfo.TotalAvailableMemoryBytes - memoryInfo.TotalCommittedBytes;
- var unusedLoad = memoryInfo.HighMemoryLoadThresholdBytes - memoryInfo.MemoryLoadBytes;
- var systemAvailable = Math.Max(0, Math.Min(committedOfLimit, unusedLoad));
- var processAvailable = memoryInfo.TotalCommittedBytes - memoryInfo.HeapSizeBytes;
- var availableMemory = systemAvailable + processAvailable;
- var maxAvailableMemory = Math.Min(memoryInfo.TotalAvailableMemoryBytes, memoryInfo.HighMemoryLoadThresholdBytes);
+ var committedOfLimit = memoryInfo.TotalAvailableMemoryBytes - memoryInfo.TotalCommittedBytes;
+ var unusedLoad = memoryInfo.HighMemoryLoadThresholdBytes - memoryInfo.MemoryLoadBytes;
+ var systemAvailable = Math.Max(0, Math.Min(committedOfLimit, unusedLoad));
+ var processAvailable = memoryInfo.TotalCommittedBytes - memoryInfo.HeapSizeBytes;
+ var availableMemory = systemAvailable + processAvailable;
+ var maxAvailableMemory = Math.Min(memoryInfo.TotalAvailableMemoryBytes, memoryInfo.HighMemoryLoadThresholdBytes);
- var filteredCpuUsage = _cpuUsageFilter.Filter(cpuUsage);
- var filteredMemoryUsage = (long)_memoryUsageFilter.Filter(memoryUsage);
- var filteredAvailableMemory = (long)_availableMemoryFilter.Filter(availableMemory);
- // no need to filter 'maxAvailableMemory' as it will almost always be a steady value.
+ var filteredCpuUsage = _cpuUsageFilter.Filter(cpuUsage);
+ var filteredMemoryUsage = (long)_memoryUsageFilter.Filter(memoryUsage);
+ var filteredAvailableMemory = (long)_availableMemoryFilter.Filter(availableMemory);
+ // no need to filter 'maxAvailableMemory' as it will almost always be a steady value.
- _availableMemoryBytes = filteredAvailableMemory;
- _maximumAvailableMemoryBytes = maxAvailableMemory;
+ _availableMemoryBytes = filteredAvailableMemory;
+ _maximumAvailableMemoryBytes = maxAvailableMemory;
- return new(filteredCpuUsage, filteredMemoryUsage, filteredAvailableMemory, maxAvailableMemory);
- }
+ return new(filteredCpuUsage, filteredMemoryUsage, filteredAvailableMemory, maxAvailableMemory);
+}
public void Dispose() => _eventCounterListener.Dispose();
diff --git a/src/Orleans.Runtime/Catalog/ActivationCollector.cs b/src/Orleans.Runtime/Catalog/ActivationCollector.cs
index 90ba8287cd..20b5eb8d49 100644
--- a/src/Orleans.Runtime/Catalog/ActivationCollector.cs
+++ b/src/Orleans.Runtime/Catalog/ActivationCollector.cs
@@ -7,7 +7,6 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Orleans.Configuration;
-using Orleans.Internal;
using Orleans.Runtime.Internal;
namespace Orleans.Runtime
@@ -15,11 +14,12 @@ namespace Orleans.Runtime
///
/// Identifies activations that have been idle long enough to be deactivated.
///
- internal class ActivationCollector : IActivationWorkingSetObserver, ILifecycleParticipant
+ internal class ActivationCollector : IActivationWorkingSetObserver, ILifecycleParticipant, IDisposable
{
private readonly TimeSpan quantum;
private readonly TimeSpan shortestAgeLimit;
private readonly ConcurrentDictionary buckets = new();
+ private readonly CancellationTokenSource _shutdownCts = new();
private DateTime nextTicket;
private static readonly List nothing = new(0);
private readonly ILogger logger;
@@ -74,7 +74,7 @@ public int GetNumRecentlyUsed(TimeSpan recencyPeriod)
///
/// The age limit.
/// A representing the work performed.
- public Task CollectActivations(TimeSpan ageLimit) => CollectActivationsImpl(false, ageLimit);
+ public Task CollectActivations(TimeSpan ageLimit, CancellationToken cancellationToken) => CollectActivationsImpl(false, ageLimit, cancellationToken);
///
/// Schedules the provided grain context for collection if it becomes idle for the specified duration.
@@ -213,7 +213,6 @@ public List ScanStale()
{
var now = DateTime.UtcNow;
List condemned = null;
- var reason = GetDeactivationReason();
while (DequeueQuantum(out var activations, now))
{
// At this point, all tickets associated with activations are cancelled and any attempts to reschedule will fail silently.
@@ -225,15 +224,9 @@ public List ScanStale()
activation.CollectionTicket = default;
if (!activation.IsValid)
{
+ // This is not an error scenario because the activation may have become invalid between the time
+ // we captured a snapshot in 'DequeueQuantum' and now. We are not be able to observe such changes.
// Do nothing: don't collect, don't reschedule.
- // The activation can't be in Created or Activating, since we only ScheduleCollection after successfull activation.
- // If the activation is already in Deactivating or Invalid state, its already being collected or was collected
- // (both mean a bug, this activation should not be in the collector)
- // So in any state except for Valid we should just not collect and not reschedule.
- logger.LogWarning(
- (int)ErrorCode.Catalog_ActivationCollector_BadState_1,
- "ActivationCollector found an activation in a non Valid state. All activation inside the ActivationCollector should be in Valid state. Activation: {Activation}",
- activation);
}
else if (activation.KeepAliveUntil > now)
{
@@ -248,8 +241,8 @@ public List ScanStale()
else
{
// Atomically set Deactivating state, to disallow any new requests or new timer ticks to be dispatched on this activation.
- activation.Deactivate(reason, cancellationToken: default);
- AddActivationToList(activation, ref condemned);
+ condemned ??= [];
+ condemned.Add(activation);
}
}
}
@@ -267,7 +260,6 @@ public List ScanAll(TimeSpan ageLimit)
{
List condemned = null;
var now = DateTime.UtcNow;
- var reason = GetDeactivationReason();
foreach (var kv in buckets)
{
var bucket = kv.Value;
@@ -294,10 +286,10 @@ public List ScanAll(TimeSpan ageLimit)
{
if (bucket.TryRemove(activation))
{
- // we removed the activation from the collector. it's our responsibility to deactivate it.
- activation.Deactivate(reason, cancellationToken: default);
- AddActivationToList(activation, ref condemned);
+ condemned ??= [];
+ condemned.Add(activation);
}
+
// someone else has already deactivated the activation, so there's nothing to do.
}
else
@@ -319,12 +311,6 @@ private static DeactivationReason GetDeactivationReason()
return reason;
}
- private void AddActivationToList(ICollectibleGrainContext activation, ref List condemned)
- {
- condemned ??= [];
- condemned.Add(activation);
- }
-
private void ThrowIfTicketIsInvalid(DateTime ticket)
{
if (ticket.Ticks == 0) throw new ArgumentException("Empty ticket is not allowed in this context.");
@@ -372,9 +358,9 @@ private void Add(ICollectibleGrainContext item, DateTime ticket)
void IActivationWorkingSetObserver.OnAdded(IActivationWorkingSetMember member)
{
- Interlocked.Increment(ref _activationCount);
if (member is ICollectibleGrainContext activation)
{
+ Interlocked.Increment(ref _activationCount);
if (activation.CollectionTicket == default)
{
ScheduleCollection(activation, activation.CollectionAgeLimit, DateTime.UtcNow);
@@ -410,10 +396,9 @@ void IActivationWorkingSetObserver.OnDeactivating(IActivationWorkingSetMember me
void IActivationWorkingSetObserver.OnDeactivated(IActivationWorkingSetMember member)
{
- Interlocked.Decrement(ref _activationCount);
- if (member is ICollectibleGrainContext activation)
+ if (member is ICollectibleGrainContext activation && TryCancelCollection(activation))
{
- TryCancelCollection(activation);
+ Interlocked.Decrement(ref _activationCount);
}
}
@@ -426,6 +411,7 @@ private Task Start(CancellationToken cancellationToken)
private async Task Stop(CancellationToken cancellationToken)
{
+ using var registration = cancellationToken.Register(() => _shutdownCts.Cancel());
_collectionTimer.Dispose();
if (_collectionLoopTask is Task task)
@@ -439,18 +425,19 @@ void ILifecycleParticipant.Participate(ISiloLifecycle lifecycle)
lifecycle.Subscribe(
nameof(ActivationCollector),
ServiceLifecycleStage.RuntimeServices,
- async cancellation => await Start(cancellation),
- async cancellation => await Stop(cancellation));
+ Start,
+ Stop);
}
private async Task RunActivationCollectionLoop()
{
await Task.CompletedTask.ConfigureAwait(ConfigureAwaitOptions.ForceYielding);
+ var cancellationToken = _shutdownCts.Token;
while (await _collectionTimer.WaitForNextTickAsync())
{
try
{
- await this.CollectActivationsImpl(true);
+ await this.CollectActivationsImpl(true, ageLimit: default, cancellationToken);
}
catch (Exception exception)
{
@@ -459,7 +446,7 @@ private async Task RunActivationCollectionLoop()
}
}
- private async Task CollectActivationsImpl(bool scanStale, TimeSpan ageLimit = default)
+ private async Task CollectActivationsImpl(bool scanStale, TimeSpan ageLimit, CancellationToken cancellationToken)
{
var watch = ValueStopwatch.StartNew();
var number = Interlocked.Increment(ref collectionNumber);
@@ -478,12 +465,10 @@ private async Task CollectActivationsImpl(bool scanStale, TimeSpan ageLimit = de
List list = scanStale ? ScanStale() : ScanAll(ageLimit);
CatalogInstruments.ActivationCollections.Add(1);
- var count = 0;
- if (list != null && list.Count > 0)
+ if (list is { Count: > 0 })
{
- count = list.Count;
if (logger.IsEnabled(LogLevel.Debug)) logger.LogDebug("CollectActivations {Activations}", list.ToStrings(d => d.GrainId.ToString() + d.ActivationId));
- await DeactivateActivationsFromCollector(list);
+ await DeactivateActivationsFromCollector(list, cancellationToken);
}
long memAfter = GC.GetTotalMemory(false) / (1024 * 1024);
@@ -497,31 +482,38 @@ private async Task CollectActivationsImpl(bool scanStale, TimeSpan ageLimit = de
number,
memAfter,
_activationCount,
- count,
+ list?.Count ?? 0,
ToString(),
watch.Elapsed);
}
}
- private async Task DeactivateActivationsFromCollector(List list)
+ private async Task DeactivateActivationsFromCollector(List list, CancellationToken cancellationToken)
{
- var mtcs = new MultiTaskCompletionSource(list.Count);
-
- logger.LogInformation((int)ErrorCode.Catalog_ShutdownActivations_1, "DeactivateActivationsFromCollector: total {Count} to promptly Destroy.", list.Count);
+ logger.LogInformation((int)ErrorCode.Catalog_ShutdownActivations_1, "Deactivating '{Count}' idle activations.", list.Count);
CatalogInstruments.ActivationShutdownViaCollection();
- Action signalCompletion = mtcs.SetOneResult;
var reason = GetDeactivationReason();
- for (var i = 0; i < list.Count; i++)
+
+ var options = new ParallelOptions
{
- var activationData = list[i];
+ // Avoid passing the cancellation token, since we want all of these activations to be deactivated, even if cancellation is triggered.
+ CancellationToken = CancellationToken.None,
+ MaxDegreeOfParallelism = Environment.ProcessorCount * 512
+ };
+ await Parallel.ForEachAsync(list, options, async (activationData, token) =>
+ {
// Continue deactivation when ready.
- activationData.Deactivate(reason);
- activationData.Deactivated.GetAwaiter().OnCompleted(signalCompletion);
- }
+ activationData.Deactivate(reason, cancellationToken);
+ await activationData.Deactivated.ConfigureAwait(false);
+ }).WaitAsync(cancellationToken);
+ }
- await mtcs.Task;
+ public void Dispose()
+ {
+ _collectionTimer.Dispose();
+ _shutdownCts.Dispose();
}
private class Bucket
@@ -568,7 +560,7 @@ public List CancelAll()
item.CollectionTicket = default;
}
- result ??= new List();
+ result ??= [];
result.Add(pair.Value);
}
diff --git a/src/Orleans.Runtime/Catalog/ActivationData.cs b/src/Orleans.Runtime/Catalog/ActivationData.cs
index 5bb3b41027..345f71490d 100644
--- a/src/Orleans.Runtime/Catalog/ActivationData.cs
+++ b/src/Orleans.Runtime/Catalog/ActivationData.cs
@@ -3,7 +3,6 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
-using System.Linq;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Threading;
@@ -607,8 +606,9 @@ public bool DeactivateCore(DeactivationReason reason, CancellationToken cancella
if (state is ActivationState.Creating or ActivationState.Activating or ActivationState.Valid)
{
CancelPendingOperations();
- SetState(ActivationState.Deactivating);
+
_shared.InternalRuntime.ActivationWorkingSet.OnDeactivating(this);
+ SetState(ActivationState.Deactivating);
var cts = CancellationTokenSource.CreateLinkedTokenSource(cancellationToken);
cts.CancelAfter(_shared.InternalRuntime.CollectionOptions.Value.DeactivationTimeout);
ScheduleOperation(new Command.Deactivate(cts, state));
@@ -805,9 +805,11 @@ public async ValueTask DisposeAsync()
_extras.IsDisposing = true;
CancelPendingOperations();
+
lock (this)
{
- State = ActivationState.Invalid;
+ _shared.InternalRuntime.ActivationWorkingSet.OnDeactivated(this);
+ SetState(ActivationState.Invalid);
}
DisposeTimers();
@@ -1177,13 +1179,10 @@ async Task ProcessOperationsAsync()
await ActivateAsync(command.RequestContext, command.CancellationToken).SuppressThrowing();
break;
case Command.Deactivate command:
- await FinishDeactivating(command.CancellationToken, command.PreviousState).SuppressThrowing();
+ await FinishDeactivating(command.PreviousState, command.CancellationToken).SuppressThrowing();
break;
case Command.Delay command:
- await Task.Delay(command.Duration, GrainRuntime.TimeProvider, command.CancellationToken);
- break;
- case Command.UnregisterFromCatalog:
- UnregisterMessageTarget();
+ await Task.Delay(command.Duration, GrainRuntime.TimeProvider, command.CancellationToken).SuppressThrowing();
break;
default:
throw new NotSupportedException($"Encountered unknown operation of type {op?.GetType().ToString() ?? "null"} {op}.");
@@ -1462,6 +1461,15 @@ private void RerouteAllQueuedMessages()
return;
}
+ // If deactivation was caused by a transient failure, allow messages to be forwarded.
+ if (DeactivationReason.ReasonCode.IsTransientError())
+ {
+ foreach (var msg in msgs)
+ {
+ msg.ForwardCount = Math.Max(msg.ForwardCount - 1, 0);
+ }
+ }
+
if (_shared.Logger.IsEnabled(LogLevel.Debug))
{
if (ForwardingAddress is { } address)
@@ -1497,7 +1505,6 @@ private async Task ActivateAsync(Dictionary? requestContextData,
{
// A chain of promises that will have to complete in order to complete the activation
// Register with the grain directory and call the Activate method on the new activation.
- var stopwatch = ValueStopwatch.StartNew();
try
{
// Currently, the only grain type that is not registered in the Grain Directory is StatelessWorker.
@@ -1511,6 +1518,11 @@ private async Task ActivateAsync(Dictionary? requestContextData,
{
while (true)
{
+ if (_shared.Logger.IsEnabled(LogLevel.Debug))
+ {
+ _shared.Logger.LogDebug("Registering grain '{Grain}' in activation directory. Previous known registration is '{PreviousRegistration}'.", this, previousRegistration);
+ }
+
var result = await _shared.InternalRuntime.GrainLocator.Register(Address, previousRegistration).WaitAsync(cancellationToken);
if (Address.Matches(result))
{
@@ -1519,20 +1531,22 @@ private async Task ActivateAsync(Dictionary? requestContextData,
}
else if (result?.SiloAddress is { } registeredSilo && registeredSilo.Equals(Address.SiloAddress))
{
+ // Attempt to register this activation again, using the registration of the previous instance of this grain,
+ // which is registered to this silo. That activation must be a defunct predecessor of this activation,
+ // since the catalog only allows one activation of a given grain at a time.
+ // This could occur if the previous activation failed to unregister itself from the grain directory.
+ previousRegistration = result;
+
if (_shared.Logger.IsEnabled(LogLevel.Debug))
{
_shared.Logger.LogDebug(
- "The grain directory has an existing entry pointing to a different activation of this grain on this silo, {PreviousRegistration}."
+ "The grain directory has an existing entry pointing to a different activation of this grain, '{GrainId}', on this silo: '{PreviousRegistration}'."
+ " This may indicate that the previous activation was deactivated but the directory was not successfully updated."
+ " The directory will be updated to point to this activation.",
- previousRegistration);
+ GrainId,
+ result);
}
- // Attempt to register this activation again, using the registration of the previous instance of this grain,
- // which is registered to this silo. That activation must be a defunct predecessor of this activation,
- // since the catalog only allows one activation of a given grain at a time.
- // This could occur if the previous activation failed to unregister itself from the grain directory.
- previousRegistration = result;
continue;
}
else
@@ -1631,14 +1645,13 @@ private async Task ActivateAsync(Dictionary? requestContextData,
lock (this)
{
- if (State == ActivationState.Activating)
+ if (State is ActivationState.Activating)
{
SetState(ActivationState.Valid); // Activate calls on this activation are finished
+ _shared.InternalRuntime.ActivationWorkingSet.OnActivated(this);
}
}
- _shared.InternalRuntime.ActivationWorkingSet.OnActivated(this);
-
if (_shared.Logger.IsEnabled(LogLevel.Debug))
{
_shared.Logger.LogDebug((int)ErrorCode.Catalog_AfterCallingActivate, "Finished activating grain {Grain}", this);
@@ -1676,11 +1689,6 @@ private async Task ActivateAsync(Dictionary? requestContextData,
}
finally
{
- if (cancellationToken.IsCancellationRequested && stopwatch.Elapsed.TotalMilliseconds > 50)
- {
- _shared.Logger.LogInformation("Cancellation requested for activation {Activation} took {ElapsedMilliseconds:0.0}ms.", this, stopwatch.Elapsed.TotalMilliseconds);
- }
-
_workSignal.Signal();
}
}
@@ -1691,10 +1699,8 @@ private async Task ActivateAsync(Dictionary? requestContextData,
///
/// Completes the deactivation process.
///
- /// A cancellation which terminates graceful deactivation when cancelled.
- private async Task FinishDeactivating(CancellationToken cancellationToken, ActivationState previousState)
+ private async Task FinishDeactivating(ActivationState previousState, CancellationToken cancellationToken)
{
- var stopwatch = ValueStopwatch.StartNew();
var migrated = false;
var encounteredError = false;
try
@@ -1707,13 +1713,6 @@ private async Task FinishDeactivating(CancellationToken cancellationToken, Activ
// Stop timers from firing.
DisposeTimers();
- // Note: This call is being made from within Scheduler.Queue wrapper, so we are already executing on worker thread
- if (_shared.Logger.IsEnabled(LogLevel.Debug))
- _shared.Logger.LogDebug(
- (int)ErrorCode.Catalog_BeforeCallingDeactivate,
- "About to call OnDeactivateAsync for '{Activation}'",
- this);
-
// If the grain was valid when deactivation started, call OnDeactivateAsync.
if (previousState == ActivationState.Valid)
{
@@ -1721,6 +1720,12 @@ private async Task FinishDeactivating(CancellationToken cancellationToken, Activ
{
try
{
+ if (_shared.Logger.IsEnabled(LogLevel.Debug))
+ _shared.Logger.LogDebug(
+ (int)ErrorCode.Catalog_BeforeCallingDeactivate,
+ "About to call OnDeactivateAsync for '{Activation}'",
+ this);
+
await grainBase.OnDeactivateAsync(DeactivationReason, cancellationToken).WaitAsync(cancellationToken);
if (_shared.Logger.IsEnabled(LogLevel.Debug))
@@ -1793,8 +1798,9 @@ private async Task FinishDeactivating(CancellationToken cancellationToken, Activ
// If the instance is being deactivated due to a directory failure, we should not unregister it.
var isDirectoryFailure = DeactivationReason.ReasonCode is DeactivationReasonCode.DirectoryFailure;
+ var isShuttingDown = DeactivationReason.ReasonCode is DeactivationReasonCode.ShuttingDown;
- if (!migrated && IsUsingGrainDirectory && !cancellationToken.IsCancellationRequested && !isDirectoryFailure)
+ if (!migrated && IsUsingGrainDirectory && !cancellationToken.IsCancellationRequested && !isDirectoryFailure && !isShuttingDown)
{
// Unregister from directory.
// If the grain was migrated, the new activation will perform a check-and-set on the registration itself.
@@ -1804,9 +1810,17 @@ private async Task FinishDeactivating(CancellationToken cancellationToken, Activ
}
catch (Exception exception)
{
- _shared.Logger.LogError(exception, "Failed to unregister activation '{Activation}' from directory.", this);
+ if (!cancellationToken.IsCancellationRequested)
+ {
+ _shared.Logger.LogError(exception, "Failed to unregister activation '{Activation}' from directory.", this);
+ }
}
}
+ else if (isDirectoryFailure)
+ {
+ // Optimization: forward to the same host to restart activation without needing to invalidate caches.
+ ForwardingAddress ??= Address.SiloAddress;
+ }
}
catch (Exception ex)
{
@@ -1830,7 +1844,7 @@ private async Task FinishDeactivating(CancellationToken cancellationToken, Activ
CatalogInstruments.ActivationShutdownViaCollection();
}
- _shared.InternalRuntime.ActivationWorkingSet.OnDeactivated(this);
+ UnregisterMessageTarget();
try
{
@@ -1841,15 +1855,9 @@ private async Task FinishDeactivating(CancellationToken cancellationToken, Activ
_shared.Logger.LogWarning(exception, "Exception disposing activation '{Activation}'.", this);
}
- UnregisterMessageTarget();
-
// Signal deactivation
GetDeactivationCompletionSource().TrySetResult(true);
_workSignal.Signal();
- if (cancellationToken.IsCancellationRequested && stopwatch.Elapsed.TotalMilliseconds > 50)
- {
- _shared.Logger.LogInformation("Cancellation requested for deactivation {Activation} took {ElapsedMilliseconds:0.0}ms.", this, stopwatch.Elapsed.TotalMilliseconds);
- }
}
private TaskCompletionSource GetDeactivationCompletionSource()
@@ -2049,11 +2057,6 @@ public sealed class Delay(TimeSpan duration) : Command(new())
{
public TimeSpan Duration { get; } = duration;
}
-
- public sealed class UnregisterFromCatalog() : Command(new())
- {
- public static readonly UnregisterFromCatalog Instance = new();
- }
}
internal class ReentrantRequestTracker : Dictionary
diff --git a/src/Orleans.Runtime/Catalog/ActivationDirectory.cs b/src/Orleans.Runtime/Catalog/ActivationDirectory.cs
index 0d3a9d7fd6..bde0450e69 100644
--- a/src/Orleans.Runtime/Catalog/ActivationDirectory.cs
+++ b/src/Orleans.Runtime/Catalog/ActivationDirectory.cs
@@ -35,12 +35,15 @@ public void RecordNewTarget(IGrainContext target)
}
}
- public void RemoveTarget(IGrainContext target)
+ public bool RemoveTarget(IGrainContext target)
{
if (_activations.TryRemove(KeyValuePair.Create(target.GrainId, target)))
{
Interlocked.Decrement(ref _activationsCount);
+ return true;
}
+
+ return false;
}
public IEnumerator> GetEnumerator() => _activations.GetEnumerator();
diff --git a/src/Orleans.Runtime/Catalog/ActivationWorkingSet.cs b/src/Orleans.Runtime/Catalog/ActivationWorkingSet.cs
index 0f36b959e9..b232c58bb0 100644
--- a/src/Orleans.Runtime/Catalog/ActivationWorkingSet.cs
+++ b/src/Orleans.Runtime/Catalog/ActivationWorkingSet.cs
@@ -1,6 +1,7 @@
using System;
using System.Collections.Concurrent;
using System.Collections.Generic;
+using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
@@ -43,6 +44,7 @@ public ActivationWorkingSet(
public void OnActivated(IActivationWorkingSetMember member)
{
+ Debug.Assert(member is not ICollectibleGrainContext collectible || collectible.IsValid);
if (_members.TryAdd(member, new MemberState()))
{
Interlocked.Increment(ref _activeCount);
diff --git a/src/Orleans.Runtime/Catalog/Catalog.cs b/src/Orleans.Runtime/Catalog/Catalog.cs
index d19e51e64b..56b6dedf27 100644
--- a/src/Orleans.Runtime/Catalog/Catalog.cs
+++ b/src/Orleans.Runtime/Catalog/Catalog.cs
@@ -22,7 +22,6 @@ internal sealed class Catalog : SystemTarget, ICatalog
private readonly ActivationDirectory activations;
private readonly IServiceProvider serviceProvider;
private readonly ILogger logger;
- private readonly IOptions collectionOptions;
private readonly GrainContextActivator grainActivator;
public Catalog(
@@ -32,7 +31,6 @@ public Catalog(
ActivationCollector activationCollector,
IServiceProvider serviceProvider,
ILoggerFactory loggerFactory,
- IOptions collectionOptions,
GrainContextActivator grainActivator)
: base(Constants.CatalogType, localSiloDetails.SiloAddress, loggerFactory)
{
@@ -40,7 +38,6 @@ public Catalog(
this.grainDirectoryResolver = grainDirectoryResolver;
this.activations = activationDirectory;
this.serviceProvider = serviceProvider;
- this.collectionOptions = collectionOptions;
this.grainActivator = grainActivator;
this.logger = loggerFactory.CreateLogger();
this.activationCollector = activationCollector;
@@ -70,20 +67,21 @@ public Catalog(
///
public void UnregisterMessageTarget(IGrainContext activation)
{
- if (logger.IsEnabled(LogLevel.Trace))
+ if (activations.RemoveTarget(activation))
{
- logger.LogTrace("Unregistering activation {Activation}", activation.ToString());
- }
+ if (logger.IsEnabled(LogLevel.Trace))
+ {
+ logger.LogTrace("Unregistered activation {Activation}", activation);
+ }
- activations.RemoveTarget(activation);
+ // this should be removed once we've refactored the deactivation code path. For now safe to keep.
+ if (activation is ICollectibleGrainContext collectibleActivation)
+ {
+ activationCollector.TryCancelCollection(collectibleActivation);
+ }
- // this should be removed once we've refactored the deactivation code path. For now safe to keep.
- if (activation is ICollectibleGrainContext collectibleActivation)
- {
- activationCollector.TryCancelCollection(collectibleActivation);
+ CatalogInstruments.ActivationsDestroyed.Add(1);
}
-
- CatalogInstruments.ActivationsDestroyed.Add(1);
}
///
@@ -161,7 +159,13 @@ public IGrainContext GetOrCreateActivation(
if (!SiloStatusOracle.CurrentStatus.IsTerminating())
{
- var address = GrainAddress.GetAddress(Silo, grainId, ActivationId.NewId());
+ var address = new GrainAddress
+ {
+ SiloAddress = Silo,
+ GrainId = grainId,
+ ActivationId = ActivationId.NewId(),
+ MembershipVersion = MembershipVersion.MinValue,
+ };
result = this.grainActivator.CreateInstance(address);
activations.RecordNewTarget(result);
}
@@ -182,17 +186,17 @@ public IGrainContext GetOrCreateActivation(
}
// Initialize the new activation asynchronously.
- var cancellation = new CancellationTokenSource(collectionOptions.Value.ActivationTimeout);
- result.Activate(requestContextData, cancellation.Token);
+ result.Activate(requestContextData);
return result;
[MethodImpl(MethodImplOptions.NoInlining)]
static IGrainContext UnableToCreateActivation(Catalog self, GrainId grainId)
{
// Did not find and did not start placing new
+ var isTerminating = self.SiloStatusOracle.CurrentStatus.IsTerminating();
if (self.logger.IsEnabled(LogLevel.Debug))
{
- if (self.SiloStatusOracle.CurrentStatus.IsTerminating())
+ if (isTerminating)
{
self.logger.LogDebug((int)ErrorCode.CatalogNonExistingActivation2, "Unable to create activation for grain {GrainId} because this silo is terminating", grainId);
}
@@ -206,14 +210,17 @@ static IGrainContext UnableToCreateActivation(Catalog self, GrainId grainId)
var grainLocator = self.serviceProvider.GetRequiredService();
grainLocator.InvalidateCache(grainId);
+ if (!isTerminating)
+ {
+ // Unregister the target activation so we don't keep getting spurious messages.
+ // The time delay (one minute, as of this writing) is to handle the unlikely but possible race where
+ // this request snuck ahead of another request, with new placement requested, for the same activation.
+ // If the activation registration request from the new placement somehow sneaks ahead of this deregistration,
+ // we want to make sure that we don't unregister the activation we just created.
+ var address = new GrainAddress { SiloAddress = self.Silo, GrainId = grainId };
+ _ = self.UnregisterNonExistentActivation(address);
+ }
- // Unregister the target activation so we don't keep getting spurious messages.
- // The time delay (one minute, as of this writing) is to handle the unlikely but possible race where
- // this request snuck ahead of another request, with new placement requested, for the same activation.
- // If the activation registration request from the new placement somehow sneaks ahead of this deregistration,
- // we want to make sure that we don't unregister the activation we just created.
- var address = new GrainAddress { SiloAddress = self.Silo, GrainId = grainId };
- _ = self.UnregisterNonExistentActivation(address);
return null;
}
}
@@ -245,7 +252,7 @@ private bool TryGetGrainContext(GrainId grainId, out IGrainContext data)
}
///
- /// Gracefully deletes activations, putting it into a shutdown state to
+ /// Gracefully deactivates activations, waiting for them to complete
/// complete and commit outstanding transactions before deleting it.
/// To be called not from within Activation context, so can be awaited.
///
@@ -254,26 +261,21 @@ internal async Task DeactivateActivations(DeactivationReason reason, List(list.Count);
- foreach (var activation in list)
+ var options = new ParallelOptions
{
- activation.Deactivate(reason, cancellationToken);
- tasks.Add(activation.Deactivated);
- }
-
- await Task.WhenAll(tasks);
- }
-
- internal void StartDeactivatingActivations(DeactivationReason reason, List list, CancellationToken cancellationToken)
- {
- if (list == null || list.Count == 0) return;
-
- if (logger.IsEnabled(LogLevel.Debug)) logger.LogDebug("DeactivateActivations: {Count} activations.", list.Count);
-
- foreach (var activation in list)
+ CancellationToken = CancellationToken.None,
+ MaxDegreeOfParallelism = Environment.ProcessorCount * 512
+ };
+ await Parallel.ForEachAsync(list, options, (activation, _) =>
{
+ if (activation.GrainId.Type.IsSystemTarget())
+ {
+ return ValueTask.CompletedTask;
+ }
+
activation.Deactivate(reason, cancellationToken);
- }
+ return new (activation.Deactivated);
+ }).WaitAsync(cancellationToken);
}
public async Task DeactivateAllActivations(CancellationToken cancellationToken)
@@ -283,14 +285,24 @@ public async Task DeactivateAllActivations(CancellationToken cancellationToken)
logger.LogDebug((int)ErrorCode.Catalog_DeactivateAllActivations, "DeactivateAllActivations.");
}
- var activationsToShutdown = new List();
- foreach (var pair in activations)
+ if (logger.IsEnabled(LogLevel.Debug)) logger.LogDebug("DeactivateActivations: {Count} activations.", activations.Count);
+ var reason = new DeactivationReason(DeactivationReasonCode.ShuttingDown, "This process is terminating.");
+ var options = new ParallelOptions
{
- activationsToShutdown.Add(pair.Value);
- }
+ CancellationToken = CancellationToken.None,
+ MaxDegreeOfParallelism = Environment.ProcessorCount * 512
+ };
+ await Parallel.ForEachAsync(activations, options, (kv, _) =>
+ {
+ if (kv.Key.IsSystemTarget())
+ {
+ return ValueTask.CompletedTask;
+ }
- var reason = new DeactivationReason(DeactivationReasonCode.ShuttingDown, "This process is terminating.");
- await DeactivateActivations(reason, activationsToShutdown, cancellationToken).WaitAsync(cancellationToken);
+ var activation = kv.Value;
+ activation.Deactivate(reason, cancellationToken);
+ return new (activation.Deactivated);
+ }).WaitAsync(cancellationToken);
}
public SiloStatus LocalSiloStatus
@@ -301,20 +313,20 @@ public SiloStatus LocalSiloStatus
}
}
- public Task DeleteActivations(List addresses, DeactivationReasonCode reasonCode, string reasonText)
+ public async Task DeleteActivations(List addresses, DeactivationReasonCode reasonCode, string reasonText)
{
var tasks = new List(addresses.Count);
var deactivationReason = new DeactivationReason(reasonCode, reasonText);
- foreach (var activationAddress in addresses)
+ await Parallel.ForEachAsync(addresses, (activationAddress, cancellationToken) =>
{
if (TryGetGrainContext(activationAddress.GrainId, out var grainContext))
{
grainContext.Deactivate(deactivationReason);
- tasks.Add(grainContext.Deactivated);
+ return new ValueTask(grainContext.Deactivated);
}
- }
- return Task.WhenAll(tasks);
+ return ValueTask.CompletedTask;
+ });
}
// TODO move this logic in the LocalGrainDirectory
@@ -330,7 +342,7 @@ internal void OnSiloStatusChange(SiloAddress updatedSilo, SiloStatus status)
if (!status.IsTerminating()) return;
if (status == SiloStatus.Dead)
{
- this.RuntimeClient.BreakOutstandingMessagesToDeadSilo(updatedSilo);
+ this.RuntimeClient.BreakOutstandingMessagesToSilo(updatedSilo);
}
var activationsToShutdown = new List();
@@ -347,7 +359,7 @@ internal void OnSiloStatusChange(SiloAddress updatedSilo, SiloStatus status)
var activationData = activation.Value;
var placementStrategy = activationData.GetComponent();
var isUsingGrainDirectory = placementStrategy is { IsUsingGrainDirectory: true };
- if (!isUsingGrainDirectory || !grainDirectoryResolver.IsUsingDhtDirectory(activationData.GrainId.Type)) continue;
+ if (!isUsingGrainDirectory || !grainDirectoryResolver.IsUsingDefaultDirectory(activationData.GrainId.Type)) continue;
if (!updatedSilo.Equals(directory.GetPrimaryForGrain(activationData.GrainId))) continue;
activationsToShutdown.Add(activationData);
@@ -381,6 +393,18 @@ internal void OnSiloStatusChange(SiloAddress updatedSilo, SiloStatus status)
StartDeactivatingActivations(reason, activationsToShutdown, CancellationToken.None);
}
}
+
+ void StartDeactivatingActivations(DeactivationReason reason, List list, CancellationToken cancellationToken)
+ {
+ if (list == null || list.Count == 0) return;
+
+ if (logger.IsEnabled(LogLevel.Debug)) logger.LogDebug("DeactivateActivations: {Count} activations.", list.Count);
+
+ foreach (var activation in list)
+ {
+ activation.Deactivate(reason, cancellationToken);
+ }
+ }
}
}
}
diff --git a/src/Orleans.Core/SystemTargetInterfaces/ICatalog.cs b/src/Orleans.Runtime/Catalog/ICatalog.cs
similarity index 99%
rename from src/Orleans.Core/SystemTargetInterfaces/ICatalog.cs
rename to src/Orleans.Runtime/Catalog/ICatalog.cs
index b8fcbc1bc9..fd716c27b7 100644
--- a/src/Orleans.Core/SystemTargetInterfaces/ICatalog.cs
+++ b/src/Orleans.Runtime/Catalog/ICatalog.cs
@@ -1,7 +1,6 @@
using System.Collections.Generic;
using System.Threading.Tasks;
-
namespace Orleans.Runtime
{
///
diff --git a/src/Orleans.Runtime/Core/InsideRuntimeClient.cs b/src/Orleans.Runtime/Core/InsideRuntimeClient.cs
index 922fe223d2..d3069636fc 100644
--- a/src/Orleans.Runtime/Core/InsideRuntimeClient.cs
+++ b/src/Orleans.Runtime/Core/InsideRuntimeClient.cs
@@ -506,7 +506,7 @@ private Task OnRuntimeInitializeStart(CancellationToken tc)
return Task.CompletedTask;
}
- public void BreakOutstandingMessagesToDeadSilo(SiloAddress deadSilo)
+ public void BreakOutstandingMessagesToSilo(SiloAddress deadSilo)
{
foreach (var callback in callbacks)
{
diff --git a/src/Orleans.Runtime/GrainDirectory/DirectoryMembershipService.cs b/src/Orleans.Runtime/GrainDirectory/DirectoryMembershipService.cs
new file mode 100644
index 0000000000..cdf3585dc6
--- /dev/null
+++ b/src/Orleans.Runtime/GrainDirectory/DirectoryMembershipService.cs
@@ -0,0 +1,92 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.Logging;
+using Orleans.Internal;
+using Orleans.Runtime.Internal;
+using Orleans.Runtime.Utilities;
+
+#nullable enable
+namespace Orleans.Runtime.GrainDirectory;
+
+internal sealed partial class DirectoryMembershipService : IAsyncDisposable
+{
+ private readonly IInternalGrainFactory _grainFactory;
+ private readonly ILogger _logger;
+ private readonly CancellationTokenSource _shutdownCts = new();
+ private readonly Task _runTask;
+ private readonly AsyncEnumerable _viewUpdates;
+
+ public DirectoryMembershipSnapshot CurrentView { get; private set; } = DirectoryMembershipSnapshot.Default;
+
+ public IAsyncEnumerable ViewUpdates => _viewUpdates;
+
+ public ClusterMembershipService ClusterMembershipService { get; }
+
+ public async ValueTask RefreshViewAsync(MembershipVersion version, CancellationToken cancellationToken)
+ {
+ _ = ClusterMembershipService.Refresh(version, cancellationToken);
+ if (CurrentView.Version <= version)
+ {
+ await foreach (var view in _viewUpdates.WithCancellation(cancellationToken))
+ {
+ if (view.Version >= version)
+ {
+ break;
+ }
+ }
+ }
+
+ return CurrentView;
+ }
+
+ public DirectoryMembershipService(ClusterMembershipService clusterMembershipService, IInternalGrainFactory grainFactory, ILogger logger)
+ {
+ _viewUpdates = new(
+ DirectoryMembershipSnapshot.Default,
+ (previous, proposed) => proposed.Version >= previous.Version,
+ update => CurrentView = update);
+ ClusterMembershipService = clusterMembershipService;
+ _grainFactory = grainFactory;
+ _logger = logger;
+ using var _ = new ExecutionContextSuppressor();
+ _runTask = Task.Run(ProcessMembershipUpdates);
+ }
+
+ private async Task ProcessMembershipUpdates()
+ {
+ try
+ {
+ while (!_shutdownCts.IsCancellationRequested)
+ {
+ try
+ {
+ await foreach (var update in ClusterMembershipService.MembershipUpdates.WithCancellation(_shutdownCts.Token))
+ {
+ var view = new DirectoryMembershipSnapshot(update, _grainFactory);
+ _viewUpdates.Publish(view);
+ }
+ }
+ catch (Exception exception)
+ {
+ if (!_shutdownCts.IsCancellationRequested)
+ {
+ _logger.LogError(exception, "Error processing membership updates.");
+ }
+ }
+ }
+ }
+ finally
+ {
+ _viewUpdates.Dispose();
+ }
+ }
+
+ public async ValueTask DisposeAsync()
+ {
+ _shutdownCts.Cancel();
+ await _runTask.SuppressThrowing();
+ }
+}
diff --git a/src/Orleans.Runtime/GrainDirectory/DirectoryMembershipSnapshot.cs b/src/Orleans.Runtime/GrainDirectory/DirectoryMembershipSnapshot.cs
new file mode 100644
index 0000000000..62e80a3179
--- /dev/null
+++ b/src/Orleans.Runtime/GrainDirectory/DirectoryMembershipSnapshot.cs
@@ -0,0 +1,306 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Collections.Immutable;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using Microsoft.CodeAnalysis;
+using Orleans.Configuration;
+using Orleans.Runtime.Utilities;
+
+#nullable enable
+namespace Orleans.Runtime.GrainDirectory;
+
+internal sealed class DirectoryMembershipSnapshot
+{
+ internal const int PartitionsPerSilo = ConsistentRingOptions.DEFAULT_NUM_VIRTUAL_RING_BUCKETS;
+ private readonly ImmutableArray<(uint Start, int MemberIndex, int PartitionIndex)> _ringBoundaries;
+ private readonly RingRangeCollection[] _rangesByMember;
+ private readonly ImmutableArray> _partitionsByMember;
+ private readonly ImmutableArray> _rangesByMemberPartition;
+
+ public DirectoryMembershipSnapshot(ClusterMembershipSnapshot snapshot, IInternalGrainFactory grainFactory) : this(snapshot, grainFactory, static (silo, count) => silo.GetUniformHashCodes(count))
+ {
+ }
+
+ internal DirectoryMembershipSnapshot(ClusterMembershipSnapshot snapshot, IInternalGrainFactory grainFactory, Func getRingBoundaries)
+ {
+ var sortedActiveMembers = ImmutableArray.CreateBuilder(snapshot.Members.Count(static m => m.Value.Status == SiloStatus.Active));
+ foreach (var member in snapshot.Members)
+ {
+ // Only active members are part of directory membership.
+ if (member.Value.Status == SiloStatus.Active)
+ {
+ sortedActiveMembers.Add(member.Key);
+ }
+ }
+
+ sortedActiveMembers.Sort(static (left, right) => left.CompareTo(right));
+ var hashIndexPairs = ImmutableArray.CreateBuilder<(uint Hash, int MemberIndex, int PartitionIndex)>(PartitionsPerSilo * sortedActiveMembers.Count);
+ var memberPartitions = ImmutableArray.CreateBuilder>();
+ for (var memberIndex = 0; memberIndex < sortedActiveMembers.Count; memberIndex++)
+ {
+ var activeMember = sortedActiveMembers[memberIndex];
+ var hashCodes = getRingBoundaries(activeMember, PartitionsPerSilo).ToList();
+ hashCodes.Sort();
+ Debug.Assert(hashCodes.Count == PartitionsPerSilo);
+ var partitionReferences = ImmutableArray.CreateBuilder(PartitionsPerSilo);
+ for (var partitionIndex = 0; partitionIndex < hashCodes.Count; partitionIndex++)
+ {
+ var hashCode = hashCodes[partitionIndex];
+ hashIndexPairs.Add((hashCode, memberIndex, partitionIndex));
+ partitionReferences.Add(grainFactory?.GetSystemTarget(GrainDirectoryReplica.CreateGrainId(activeMember, partitionIndex).GrainId)!);
+ }
+
+ memberPartitions.Add(partitionReferences.ToImmutable());
+ }
+
+ _partitionsByMember = memberPartitions.ToImmutable();
+
+ hashIndexPairs.Sort(static (left, right) =>
+ {
+ var hashCompare = left.Hash.CompareTo(right.Hash);
+ if (hashCompare != 0)
+ {
+ return hashCompare;
+ }
+
+ var partitionCompare = left.PartitionIndex.CompareTo(right.PartitionIndex);
+ if (partitionCompare != 0)
+ {
+ return partitionCompare;
+ }
+
+ return left.MemberIndex.CompareTo(right.MemberIndex);
+ });
+
+ Dictionary.Builder> rangesByMemberPartitionBuilders = [];
+ for (var i = 0; i < hashIndexPairs.Count; i++)
+ {
+ var (_, memberIndex, _) = hashIndexPairs[i];
+ ref var builder = ref CollectionsMarshal.GetValueRefOrAddDefault(rangesByMemberPartitionBuilders, memberIndex, out _);
+ builder ??= ImmutableArray.CreateBuilder(PartitionsPerSilo);
+ var (entryStart, _, _) = hashIndexPairs[i];
+ var (nextStart, _, _) = hashIndexPairs[(i + 1) % hashIndexPairs.Count];
+ var range = (entryStart == nextStart) switch
+ {
+ true when hashIndexPairs.Count == 1 => RingRange.Full,
+ true => RingRange.Empty,
+ _ => RingRange.Create(entryStart, nextStart)
+ };
+ builder.Add(range);
+ }
+
+ var rangesByMemberPartition = ImmutableArray.CreateBuilder>(sortedActiveMembers.Count);
+ for (var i = 0; i < sortedActiveMembers.Count; i++)
+ {
+ rangesByMemberPartition.Add(rangesByMemberPartitionBuilders[i].ToImmutable());
+ }
+
+ _rangesByMemberPartition = rangesByMemberPartition.ToImmutable();
+
+ // Remove empty ranges.
+ if (hashIndexPairs.Count > 1)
+ {
+ for (var i = 1; i < hashIndexPairs.Count;)
+ {
+ if (hashIndexPairs[i].Hash == hashIndexPairs[i - 1].Hash)
+ {
+ hashIndexPairs.RemoveAt(i);
+ }
+ else
+ {
+ i++;
+ }
+ }
+ }
+
+ _ringBoundaries = hashIndexPairs.ToImmutable();
+
+ Members = sortedActiveMembers.ToImmutable();
+
+ _rangesByMember = new RingRangeCollection[Members.Length];
+ ClusterMembershipSnapshot = snapshot;
+ }
+
+ public static DirectoryMembershipSnapshot Default { get; } = new DirectoryMembershipSnapshot(
+ new ClusterMembershipSnapshot(ImmutableDictionary.Empty, MembershipVersion.MinValue), null!);
+
+ public MembershipVersion Version => ClusterMembershipSnapshot.Version;
+
+ public ImmutableArray Members { get; }
+
+ public RingRange GetRange(SiloAddress address, int partitionIndex)
+ {
+ ArgumentOutOfRangeException.ThrowIfLessThan(partitionIndex, 0);
+ ArgumentOutOfRangeException.ThrowIfGreaterThan(partitionIndex, PartitionsPerSilo - 1);
+
+ var memberIndex = TryGetMemberIndex(address);
+ if (memberIndex < 0)
+ {
+ return RingRange.Empty;
+ }
+
+ var ranges = GetMemberRangesByPartition(memberIndex);
+ if (partitionIndex >= ranges.Length)
+ {
+ return RingRange.Empty;
+ }
+
+ return ranges[partitionIndex];
+ }
+
+ public RingRangeCollection GetMemberRanges(SiloAddress address)
+ {
+ var memberIndex = TryGetMemberIndex(address);
+
+ if (memberIndex < 0)
+ {
+ return RingRangeCollection.Empty;
+ }
+
+ var range = _rangesByMember[memberIndex];
+ if (range.IsDefault)
+ {
+ range = _rangesByMember[memberIndex] = RingRangeCollection.Create(GetMemberRangesByPartition(memberIndex));
+ }
+
+ return range;
+ }
+
+ public ImmutableArray GetMemberRangesByPartition(SiloAddress address)
+ {
+ var memberIndex = TryGetMemberIndex(address);
+
+ if (memberIndex < 0)
+ {
+ return [];
+ }
+
+ return GetMemberRangesByPartition(memberIndex);
+ }
+
+ private ImmutableArray GetMemberRangesByPartition(int memberIndex)
+ {
+ ArgumentOutOfRangeException.ThrowIfLessThan(memberIndex, 0);
+ ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(memberIndex, _rangesByMemberPartition.Length);
+ return _rangesByMemberPartition[memberIndex];
+ }
+
+ public RangeCollection RangeOwners => new(this);
+
+ public ClusterMembershipSnapshot ClusterMembershipSnapshot { get; }
+
+ private (RingRange Range, int MemberIndex, int PartitionIndex) GetRangeInfo(int index)
+ {
+ ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(index, _ringBoundaries.Length);
+ ArgumentOutOfRangeException.ThrowIfLessThan(index, 0);
+
+ var range = GetRangeCore(index);
+ var boundary = _ringBoundaries[index];
+ return (range, boundary.MemberIndex, boundary.PartitionIndex);
+ }
+
+ private RingRange GetRangeCore(int index)
+ {
+ ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(index, _ringBoundaries.Length);
+ ArgumentOutOfRangeException.ThrowIfLessThan(index, 0);
+
+ var (entryStart, _, _) = _ringBoundaries[index];
+ var (nextStart, _, _) = _ringBoundaries[(index + 1) % _ringBoundaries.Length];
+ if (entryStart == nextStart)
+ {
+ // Handle hash collisions by making subsequent adjacent ranges empty.
+ if (_ringBoundaries.Length == 1)
+ {
+ return RingRange.Full;
+ }
+ else
+ {
+ // Handle hash collisions by making subsequent adjacent ranges empty.
+ return RingRange.Empty;
+ }
+ }
+
+ return RingRange.Create(entryStart, nextStart);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ private int TryGetMemberIndex(SiloAddress? address)
+ {
+ if (address is null)
+ {
+ return -1;
+ }
+
+ return SearchAlgorithms.BinarySearch(
+ Members.Length,
+ (this, address),
+ static (index, state) =>
+ {
+ var (snapshot, address) = state;
+ var candidate = snapshot.Members[index];
+ return candidate.CompareTo(address);
+ });
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool TryGetOwner(GrainId grainId, [NotNullWhen(true)] out SiloAddress? owner, [NotNullWhen(true)] out IGrainDirectoryPartition? partitionReference) => TryGetOwner(grainId.GetUniformHashCode(), out owner, out partitionReference);
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public bool TryGetOwner(uint hashCode, [NotNullWhen(true)] out SiloAddress? owner, [NotNullWhen(true)] out IGrainDirectoryPartition? partitionReference)
+ {
+ var index = SearchAlgorithms.RingRangeBinarySearch(
+ _ringBoundaries.Length,
+ this,
+ static (collection, index) => collection.GetRangeCore(index),
+ hashCode);
+ if (index >= 0)
+ {
+ var (_, memberIndex, partitionIndex) = _ringBoundaries[index];
+ owner = Members[memberIndex];
+ partitionReference = _partitionsByMember[memberIndex][partitionIndex];
+ return true;
+ }
+
+ Debug.Assert(Members.Length == 0);
+ owner = null;
+ partitionReference = null;
+ return false;
+ }
+
+ public readonly struct RangeCollection(DirectoryMembershipSnapshot snapshot) : IReadOnlyList<(RingRange Range, int MemberIndex, int PartitionIndex)>
+ {
+ public int Count => snapshot._ringBoundaries.Length;
+
+ public (RingRange Range, int MemberIndex, int PartitionIndex) this[int index] => snapshot.GetRangeInfo(index);
+
+ IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+ IEnumerator<(RingRange Range, int MemberIndex, int PartitionIndex)> IEnumerable<(RingRange Range, int MemberIndex, int PartitionIndex)>.GetEnumerator() => GetEnumerator();
+ public RangeCollectionEnumerator GetEnumerator() => new(snapshot);
+
+ public struct RangeCollectionEnumerator(DirectoryMembershipSnapshot snapshot) : IEnumerator<(RingRange Range, int MemberIndex, int PartitionIndex)>
+ {
+ private int _index = 0;
+ public readonly (RingRange Range, int MemberIndex, int PartitionIndex) Current => snapshot.GetRangeInfo(_index - 1);
+ readonly (RingRange Range, int MemberIndex, int PartitionIndex) IEnumerator<(RingRange Range, int MemberIndex, int PartitionIndex)>.Current => Current;
+ readonly object IEnumerator.Current => Current;
+
+ public void Dispose() => _index = int.MaxValue;
+ public bool MoveNext()
+ {
+ if (_index >= 0 && _index++ < snapshot._ringBoundaries.Length)
+ {
+ return true;
+ }
+
+ return false;
+ }
+
+ public void Reset() => _index = 0;
+ }
+ }
+}
diff --git a/src/Orleans.Runtime/GrainDirectory/DirectoryResult.cs b/src/Orleans.Runtime/GrainDirectory/DirectoryResult.cs
new file mode 100644
index 0000000000..13489730e0
--- /dev/null
+++ b/src/Orleans.Runtime/GrainDirectory/DirectoryResult.cs
@@ -0,0 +1,32 @@
+using System.Diagnostics.CodeAnalysis;
+
+#nullable enable
+namespace Orleans.Runtime;
+
+internal static class DirectoryResult
+{
+ public static DirectoryResult FromResult(T result, MembershipVersion version) => new DirectoryResult(result, version);
+ public static DirectoryResult RefreshRequired(MembershipVersion version) => new DirectoryResult(default, version);
+}
+
+[GenerateSerializer, Alias("DirectoryResult`1"), Immutable]
+internal readonly struct DirectoryResult(T? result, MembershipVersion version)
+{
+ [Id(0)]
+ private readonly T? _result = result;
+
+ [Id(1)]
+ public readonly MembershipVersion Version = version;
+
+ public bool TryGetResult(MembershipVersion version, [NotNullWhen(true)] out T? result)
+ {
+ if (Version == version)
+ {
+ result = _result!;
+ return true;
+ }
+
+ result = default;
+ return false;
+ }
+}
diff --git a/src/Orleans.Runtime/GrainDirectory/DistributedGrainDirectory.cs b/src/Orleans.Runtime/GrainDirectory/DistributedGrainDirectory.cs
new file mode 100644
index 0000000000..3d2612946c
--- /dev/null
+++ b/src/Orleans.Runtime/GrainDirectory/DistributedGrainDirectory.cs
@@ -0,0 +1,381 @@
+using System;
+using System.Collections.Generic;
+using System.Collections.Immutable;
+using System.Linq;
+using System.Runtime.CompilerServices;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging;
+using Orleans.Concurrency;
+using Orleans.GrainDirectory;
+using Orleans.Internal;
+using Orleans.Runtime.Internal;
+using Orleans.Runtime.Scheduler;
+
+#nullable enable
+namespace Orleans.Runtime.GrainDirectory;
+
+internal sealed partial class DistributedGrainDirectory : SystemTarget, IGrainDirectory, IGrainDirectoryClient, ILifecycleParticipant, DistributedGrainDirectory.ITestHooks
+{
+ private readonly DirectoryMembershipService _membershipService;
+ private readonly ILogger _logger;
+ private readonly IServiceProvider _serviceProvider;
+ private readonly ImmutableArray _partitions;
+ private readonly CancellationTokenSource _stoppedCts = new();
+
+ internal CancellationToken OnStoppedToken => _stoppedCts.Token;
+ internal ClusterMembershipSnapshot ClusterMembershipSnapshot => _membershipService.CurrentView.ClusterMembershipSnapshot;
+
+ // The recovery membership value is used to avoid a race between concurrent registration & recovery operations which could lead to lost registrations.
+ // This could occur when a new activation is created and begins registering itself with a host which crashes. Concurrently, the new owner initiates
+ // recovery and asks all silos for their activations. When this silo processes this request, it will have the activation in its internal
+ // 'ActivationDirectory' even though these activations may not yet have completed registration. Therefore, multiple silos may return an entry for the same
+ // grain. By ensuring that any registration occurred at a version at least as high as the recovery version, we avoid this issue. This could be made more
+ // precise by also tracking the sets of ranges which need to be recovered, but that complicates things somewhat since it would require tracking the ranges
+ // for each recovery version.
+ private long _recoveryMembershipVersion;
+ private Task _runTask = Task.CompletedTask;
+
+ public DistributedGrainDirectory(
+ DirectoryMembershipService membershipService,
+ ILogger logger,
+ ILocalSiloDetails localSiloDetails,
+ ILoggerFactory loggerFactory,
+ IServiceProvider serviceProvider,
+ IInternalGrainFactory grainFactory) : base(Constants.GrainDirectory, localSiloDetails.SiloAddress, loggerFactory)
+ {
+ _serviceProvider = serviceProvider;
+ _membershipService = membershipService;
+ _logger = logger;
+ var partitions = ImmutableArray.CreateBuilder(DirectoryMembershipSnapshot.PartitionsPerSilo);
+ for (var i = 0; i < DirectoryMembershipSnapshot.PartitionsPerSilo; i++)
+ {
+ partitions.Add(new GrainDirectoryReplica(i, this, localSiloDetails, loggerFactory, serviceProvider, grainFactory));
+ }
+
+ _partitions = partitions.ToImmutable();
+ }
+
+ public async Task Lookup(GrainId grainId) => await InvokeAsync(
+ grainId,
+ static (replica, version, grainId, cancellationToken) => replica.LookupAsync(version, grainId),
+ grainId,
+ CancellationToken.None);
+
+ public async Task Register(GrainAddress address) => await InvokeAsync(
+ address.GrainId,
+ static (replica, version, address, cancellationToken) => replica.RegisterAsync(version, address, null),
+ address,
+ CancellationToken.None);
+
+ public async Task Unregister(GrainAddress address) => await InvokeAsync(
+ address.GrainId,
+ static (replica, version, address, cancellationToken) => replica.DeregisterAsync(version, address),
+ address,
+ CancellationToken.None);
+
+ public async Task Register(GrainAddress address, GrainAddress? previousAddress) => await InvokeAsync(
+ address.GrainId,
+ static (replica, version, state, cancellationToken) => replica.RegisterAsync(version, state.Address, state.PreviousAddress),
+ (Address: address, PreviousAddress: previousAddress),
+ CancellationToken.None);
+
+ public Task UnregisterSilos(List siloAddresses) => Task.CompletedTask;
+
+ private async Task InvokeAsync(
+ GrainId grainId,
+ Func>> func,
+ TState state,
+ CancellationToken cancellationToken,
+ [CallerMemberName] string operation = "")
+ {
+ DirectoryResult invokeResult;
+ var view = _membershipService.CurrentView;
+ var attempts = 0;
+ const int MaxAttempts = 10;
+ var delay = TimeSpan.FromMilliseconds(10);
+ while (true)
+ {
+ cancellationToken.ThrowIfCancellationRequested();
+ var initialRecoveryMembershipVersion = _recoveryMembershipVersion;
+ if (view.Version.Value < initialRecoveryMembershipVersion || !view.TryGetOwner(grainId, out var owner, out var partitionReference))
+ {
+ // If there are no members, bail out with the default return value.
+ if (view.Members.Length == 0 && view.Version.Value > 0)
+ {
+ return default!;
+ }
+
+ var targetVersion = Math.Max(view.Version.Value + 1, initialRecoveryMembershipVersion);
+ view = await _membershipService.RefreshViewAsync(new(targetVersion), cancellationToken);
+ continue;
+ }
+
+#if false
+ if (logger.IsEnabled(LogLevel.Trace))
+ {
+ logger.LogTrace("Invoking '{Operation}' on '{Owner}' for grain '{GrainId}'.", operation, owner, grainId);
+ }
+#endif
+
+ try
+ {
+ RequestContext.Set("gid", partitionReference.GetGrainId());
+ invokeResult = await func(partitionReference, view.Version, state, cancellationToken);
+ }
+ catch (OrleansMessageRejectionException) when (attempts < MaxAttempts && !cancellationToken.IsCancellationRequested)
+ {
+ // This likely indicates that the target silo has been declared dead.
+ ++attempts;
+ await Task.Delay(delay);
+ delay *= 1.5;
+ continue;
+ }
+
+ if (initialRecoveryMembershipVersion != _recoveryMembershipVersion)
+ {
+ // If the recovery version changed, perform a view refresh and re-issue the operation.
+ // See the comment on the declaration of '_recoveryMembershipVersionValue' for more details.
+ continue;
+ }
+
+ if (!invokeResult.TryGetResult(view.Version, out var result))
+ {
+ // The remote replica has a newer view of membership and is no longer the owner of the grain specified in the request.
+ // Refresh membership and re-evaluate.
+ view = await _membershipService.RefreshViewAsync(invokeResult.Version, cancellationToken);
+ continue;
+ }
+
+ if (_logger.IsEnabled(LogLevel.Trace))
+ {
+ _logger.LogTrace("Invoked '{Operation}' on '{Owner}' for grain '{GrainId}' and received result '{Result}'.", operation, owner, grainId, result);
+ }
+
+ return result;
+ }
+ }
+
+ public async ValueTask>> GetRegisteredActivations(MembershipVersion membershipVersion, RingRange range, bool isValidation)
+ {
+ if (!isValidation && _logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Collecting registered activations for range {Range} at version {MembershipVersion}.", range, membershipVersion);
+ }
+
+ var recoveryMembershipVersion = _recoveryMembershipVersion;
+ if (recoveryMembershipVersion < membershipVersion.Value)
+ {
+ // Ensure that the value is immediately visible to any thread registering an activation.
+ Interlocked.CompareExchange(ref _recoveryMembershipVersion, membershipVersion.Value, recoveryMembershipVersion);
+ }
+
+ var localActivations = _serviceProvider.GetRequiredService();
+ var grainDirectoryResolver = _serviceProvider.GetRequiredService();
+ List result = [];
+ List deactivationTasks = [];
+ var stopwatch = CoarseStopwatch.StartNew();
+ using var cts = new CancellationTokenSource();
+ cts.Cancel();
+ foreach (var (grainId, activation) in localActivations)
+ {
+ var directory = GetGrainDirectory(activation, grainDirectoryResolver);
+ if (directory is not null && directory == this)
+ {
+ var address = activation.Address;
+ if (!range.Contains(address.GrainId))
+ {
+ continue;
+ }
+
+ if (address.MembershipVersion == MembershipVersion.MinValue
+ || activation is ActivationData activationData && !activationData.IsValid)
+ {
+ // Validation does not require that the grain is deactivated, skip it instead.
+ //if (isValidation) continue;
+
+ try
+ {
+ // This activation has not completed registration or is not currently active.
+ // Abort the activation with a pre-canceled cancellation token so that it skips directory deregistration.
+ // TODO: Expand validity check to non-ActivationData activations.
+ //logger.LogWarning("Deactivating activation '{Activation}' due to failure of a directory range owner.", activation);
+ activation.Deactivate(new DeactivationReason(DeactivationReasonCode.DirectoryFailure, "This activation's directory partition was salvaged while registration status was in-doubt."), cts.Token);
+ deactivationTasks.Add(activation.Deactivated);
+ }
+ catch (Exception exception)
+ {
+ _logger.LogWarning(exception, "Failed to deactivate activation {Activation}", activation);
+ }
+ }
+ else
+ {
+ if (!isValidation)
+ {
+ _logger.LogTrace("Sending activation '{Activation}' for recovery because its in the requested range {Range} (version {Version}).", activation.GrainId, range, membershipVersion);
+ }
+
+ result.Add(activation.Address);
+ }
+ }
+ }
+
+ await Task.WhenAll(deactivationTasks);
+
+ if (!isValidation && _logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug(
+ "Submitting {Count} registered activations for range {Range} at version {MembershipVersion}. Deactivated {DeactivationCount} in-doubt registrations. Took {ElapsedMilliseconds}ms",
+ result.Count,
+ range,
+ membershipVersion,
+ deactivationTasks.Count,
+ stopwatch.ElapsedMilliseconds);
+ }
+
+ return result.AsImmutable();
+
+ static IGrainDirectory? GetGrainDirectory(IGrainContext grainContext, GrainDirectoryResolver grainDirectoryResolver)
+ {
+ if (grainContext is ActivationData activationData)
+ {
+ return activationData.Shared.GrainDirectory;
+ }
+ else if (grainContext is SystemTarget systemTarget)
+ {
+ return null;
+ }
+ else if (grainContext.GetComponent() is { IsUsingGrainDirectory: true })
+ {
+ return grainDirectoryResolver.Resolve(grainContext.GrainId.Type);
+ }
+
+ return null;
+ }
+ }
+
+ internal ValueTask RefreshViewAsync(MembershipVersion version, CancellationToken cancellationToken) => _membershipService.RefreshViewAsync(version, cancellationToken);
+
+ void ILifecycleParticipant.Participate(ISiloLifecycle observer)
+ {
+ observer.Subscribe(nameof(DistributedGrainDirectory), ServiceLifecycleStage.RuntimeInitialize, OnRuntimeInitializeStart, OnRuntimeInitializeStop);
+
+ // Transition into 'ShuttingDown'/'Stopping' stage, removing ourselves from directory membership, but allow some time for hand-off before transitioning to 'Dead'.
+ observer.Subscribe(nameof(DistributedGrainDirectory), ServiceLifecycleStage.BecomeActive - 1, _ => Task.CompletedTask, OnShuttingDown);
+
+ Task OnRuntimeInitializeStart(CancellationToken cancellationToken)
+ {
+ var catalog = _serviceProvider.GetRequiredService();
+ catalog.RegisterSystemTarget(this);
+ foreach (var partition in _partitions)
+ {
+ catalog.RegisterSystemTarget(partition);
+ }
+
+ using var _ = new ExecutionContextSuppressor();
+ WorkItemGroup.QueueAction(() => _runTask = ProcessMembershipUpdates());
+
+ return Task.CompletedTask;
+ }
+
+ async Task OnRuntimeInitializeStop(CancellationToken cancellationToken)
+ {
+ _stoppedCts.Cancel();
+ if (_runTask is { } task)
+ {
+ // Try to wait for hand-off to complete.
+ await this.RunOrQueueTask(async () => await task.WaitAsync(cancellationToken).SuppressThrowing());
+ }
+ }
+
+ async Task OnShuttingDown(CancellationToken token)
+ {
+ var tasks = new List(_partitions.Length);
+ foreach (var partition in _partitions)
+ {
+ tasks.Add(partition.OnShuttingDown(token));
+ }
+ await Task.WhenAll(tasks).SuppressThrowing();
+ }
+ }
+
+ private async Task ProcessMembershipUpdates()
+ {
+ // Ensure all child tasks are completed before exiting, tracking them here.
+ List tasks = [];
+ var previousUpdate = ClusterMembershipSnapshot.Default;
+ while (!_stoppedCts.IsCancellationRequested)
+ {
+ try
+ {
+ await foreach (var update in _membershipService.ViewUpdates.WithCancellation(_stoppedCts.Token))
+ {
+ tasks.RemoveAll(t => t.IsCompleted);
+ var changes = update.ClusterMembershipSnapshot.CreateUpdate(previousUpdate);
+
+ foreach (var change in changes.Changes)
+ {
+ if (change.Status == SiloStatus.Dead)
+ {
+ foreach (var partition in _partitions)
+ {
+ tasks.Add(partition.OnSiloRemovedFromClusterAsync(change));
+ }
+ }
+ }
+
+ var current = update;
+
+ foreach (var partition in _partitions)
+ {
+ tasks.Add(partition.ProcessMembershipUpdateAsync(current));
+ }
+
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Updated view from '{PreviousVersion}' to '{Version}'.", previousUpdate.Version, update.Version);
+ }
+
+ previousUpdate = update.ClusterMembershipSnapshot;
+ }
+ }
+ catch (Exception exception)
+ {
+ if (!_stoppedCts.IsCancellationRequested)
+ {
+ _logger.LogError(exception, "Error processing membership updates.");
+ }
+ }
+ }
+
+ await Task.WhenAll(tasks).SuppressThrowing();
+ }
+
+ SiloAddress? ITestHooks.GetPrimaryForGrain(GrainId grainId)
+ {
+ _membershipService.CurrentView.TryGetOwner(grainId, out var owner, out _);
+ return owner;
+ }
+
+ async Task ITestHooks.GetLocalRecord(GrainId grainId)
+ {
+ var view = _membershipService.CurrentView;
+ if (view.TryGetOwner(grainId, out var owner, out var partitionReference) && Silo.Equals(owner))
+ {
+ var result = await partitionReference.LookupAsync(view.Version, grainId);
+ if (result.TryGetResult(view.Version, out var address))
+ {
+ return address;
+ }
+ }
+
+ return null;
+ }
+
+ internal interface ITestHooks
+ {
+ SiloAddress? GetPrimaryForGrain(GrainId grainId);
+ Task GetLocalRecord(GrainId grainId);
+ }
+}
diff --git a/src/Orleans.Runtime/GrainDirectory/GrainDirectoryHandoffManager.cs b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryHandoffManager.cs
index 75a5d249ac..4fb70750f7 100644
--- a/src/Orleans.Runtime/GrainDirectory/GrainDirectoryHandoffManager.cs
+++ b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryHandoffManager.cs
@@ -21,7 +21,7 @@ internal sealed class GrainDirectoryHandoffManager
private readonly ISiloStatusOracle siloStatusOracle;
private readonly IInternalGrainFactory grainFactory;
private readonly ILogger logger;
- private readonly Factory createPartion;
+ private readonly Factory createPartion;
private readonly Queue<(string name, object state, Func action)> pendingOperations = new();
private readonly AsyncLock executorLock = new AsyncLock();
@@ -29,7 +29,7 @@ internal GrainDirectoryHandoffManager(
LocalGrainDirectory localDirectory,
ISiloStatusOracle siloStatusOracle,
IInternalGrainFactory grainFactory,
- Factory createPartion,
+ Factory createPartion,
ILoggerFactory loggerFactory)
{
logger = loggerFactory.CreateLogger();
diff --git a/src/Orleans.Runtime/GrainDirectory/GrainDirectoryPartitionSnapshot.cs b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryPartitionSnapshot.cs
new file mode 100644
index 0000000000..05445df2ab
--- /dev/null
+++ b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryPartitionSnapshot.cs
@@ -0,0 +1,16 @@
+using System.Collections.Generic;
+
+#nullable enable
+namespace Orleans.Runtime.GrainDirectory;
+
+[GenerateSerializer, Alias(nameof(GrainDirectoryPartitionSnapshot)), Immutable]
+internal sealed class GrainDirectoryPartitionSnapshot(
+ MembershipVersion directoryMembershipVersion,
+ List grainAddresses)
+{
+ [Id(0)]
+ public MembershipVersion DirectoryMembershipVersion { get; } = directoryMembershipVersion;
+
+ [Id(1)]
+ public List GrainAddresses { get; } = grainAddresses;
+}
diff --git a/src/Orleans.Runtime/GrainDirectory/GrainDirectoryReplica.Interface.cs b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryReplica.Interface.cs
new file mode 100644
index 0000000000..cb40a7cf83
--- /dev/null
+++ b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryReplica.Interface.cs
@@ -0,0 +1,112 @@
+using System;
+using System.Diagnostics;
+using System.Runtime.InteropServices;
+using System.Threading.Tasks;
+using Microsoft.Extensions.Logging;
+
+#nullable enable
+
+namespace Orleans.Runtime.GrainDirectory;
+
+internal sealed partial class GrainDirectoryReplica
+{
+ async ValueTask> IGrainDirectoryPartition.RegisterAsync(MembershipVersion version, GrainAddress address, GrainAddress? currentRegistration)
+ {
+ ArgumentNullException.ThrowIfNull(address);
+ if (_logger.IsEnabled(LogLevel.Trace))
+ {
+ _logger.LogTrace("RegisterAsync('{Version}', '{Address}', '{ExistingAddress}')", version, address, currentRegistration);
+ }
+
+ // Ensure that the current membership version is new enough.
+ await WaitForRange(address.GrainId, version);
+ if (!IsOwner(CurrentView, address.GrainId))
+ {
+ return DirectoryResult.RefreshRequired(CurrentView.Version);
+ }
+
+ DebugAssertOwnership(address.GrainId);
+ return DirectoryResult.FromResult(RegisterCore(address, currentRegistration), version);
+ }
+
+ async ValueTask> IGrainDirectoryPartition.LookupAsync(MembershipVersion version, GrainId grainId)
+ {
+ if (_logger.IsEnabled(LogLevel.Trace))
+ {
+ _logger.LogTrace("LookupAsync('{Version}', '{GrainId}')", version, grainId);
+ }
+
+ // Ensure we can serve the request.
+ await WaitForRange(grainId, version);
+ if (!IsOwner(CurrentView, grainId))
+ {
+ return DirectoryResult.RefreshRequired(CurrentView.Version);
+ }
+
+ return DirectoryResult.FromResult(LookupCore(grainId), version);
+ }
+
+ async ValueTask> IGrainDirectoryPartition.DeregisterAsync(MembershipVersion version, GrainAddress address)
+ {
+ ArgumentNullException.ThrowIfNull(address);
+ if (_logger.IsEnabled(LogLevel.Trace))
+ {
+ _logger.LogTrace("DeregisterAsync('{Version}', '{Address}')", version, address);
+ }
+
+ await WaitForRange(address.GrainId, version);
+ if (!IsOwner(CurrentView, address.GrainId))
+ {
+ return DirectoryResult.RefreshRequired(CurrentView.Version);
+ }
+
+ DebugAssertOwnership(address.GrainId);
+ return DirectoryResult.FromResult(DeregisterCore(address), version);
+ }
+
+ private bool DeregisterCore(GrainAddress address)
+ {
+ if (_directory.TryGetValue(address.GrainId, out var existing) && (existing.Matches(address) || IsSiloDead(existing)))
+ {
+ return _directory.Remove(address.GrainId);
+ }
+
+ return false;
+ }
+
+ internal GrainAddress? LookupCore(GrainId grainId)
+ {
+ if (_directory.TryGetValue(grainId, out var existing) && !IsSiloDead(existing))
+ {
+ return existing;
+ }
+
+ return null;
+ }
+
+ private GrainAddress RegisterCore(GrainAddress newAddress, GrainAddress? existingAddress)
+ {
+ ref var existing = ref CollectionsMarshal.GetValueRefOrAddDefault(_directory, newAddress.GrainId, out _);
+
+ if (existing is null || existing.Matches(existingAddress) || IsSiloDead(existing))
+ {
+ if (newAddress.MembershipVersion != CurrentView.Version)
+ {
+ // Set the membership version to match the view number in which it was registered.
+ newAddress = new()
+ {
+ GrainId = newAddress.GrainId,
+ SiloAddress = newAddress.SiloAddress,
+ ActivationId = newAddress.ActivationId,
+ MembershipVersion = CurrentView.Version
+ };
+ }
+
+ existing = newAddress;
+ }
+
+ return existing;
+ }
+
+ private bool IsSiloDead(GrainAddress existing) => _owner.ClusterMembershipSnapshot.GetSiloStatus(existing.SiloAddress) == SiloStatus.Dead;
+}
diff --git a/src/Orleans.Runtime/GrainDirectory/GrainDirectoryReplica.cs b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryReplica.cs
new file mode 100644
index 0000000000..00a28cba4d
--- /dev/null
+++ b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryReplica.cs
@@ -0,0 +1,771 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
+using System.Globalization;
+using System.Linq;
+using System.Threading;
+using System.Threading.Tasks;
+using Microsoft.CodeAnalysis;
+using Microsoft.Extensions.Logging;
+using Orleans.Concurrency;
+using Orleans.Internal;
+using Orleans.Runtime.Scheduler;
+using Orleans.Runtime.Utilities;
+
+#nullable enable
+namespace Orleans.Runtime.GrainDirectory;
+
+///
+/// Represents a single contiguous partition of the distributed grain directory.
+///
+/// The index of this partition on this silo. Each silo hosts a fixed number of dynamically sized partitions.
+internal sealed partial class GrainDirectoryReplica(
+ int partitionIndex,
+ DistributedGrainDirectory owner,
+ ILocalSiloDetails localSiloDetails,
+ ILoggerFactory loggerFactory,
+ IServiceProvider serviceProvider,
+ IInternalGrainFactory grainFactory)
+ : SystemTarget(CreateGrainId(localSiloDetails.SiloAddress, partitionIndex), localSiloDetails.SiloAddress, loggerFactory), IGrainDirectoryPartition, IGrainDirectoryTestHooks
+{
+ internal static SystemTargetGrainId CreateGrainId(SiloAddress siloAddress, int partitionIndex) => SystemTargetGrainId.Create(Constants.GrainDirectoryPartition, siloAddress, partitionIndex.ToString(CultureInfo.InvariantCulture));
+ private readonly Dictionary _directory = [];
+ private readonly int _partitionIndex = partitionIndex;
+ private readonly DistributedGrainDirectory _owner = owner;
+ private readonly IServiceProvider _serviceProvider = serviceProvider;
+ private readonly IInternalGrainFactory _grainFactory = grainFactory;
+ private readonly CancellationTokenSource _drainSnapshotsCts = new();
+ private readonly SiloAddress _id = localSiloDetails.SiloAddress;
+ private readonly ILogger _logger = loggerFactory.CreateLogger();
+ private readonly TaskCompletionSource _snapshotsDrainedTcs = new(TaskCreationOptions.RunContinuationsAsynchronously);
+ private readonly AsyncEnumerable _viewUpdates = new(
+ DirectoryMembershipSnapshot.Default,
+ (previous, proposed) => proposed.Version >= previous.Version,
+ _ => { });
+
+ // Ranges which cannot be served currently, eg because the replica is currently transferring them from a previous owner.
+ // Requests in these ranges must wait for the range to become available.
+ private readonly List<(RingRange Range, MembershipVersion Version, TaskCompletionSource Completion)> _rangeLocks = [];
+
+ // Ranges which were previously at least partially owned by this replica, but which are pending transfer to a new replica.
+ private readonly List _partitionSnapshots = [];
+
+ // Tracked for diagnostic purposes only.
+ private readonly List _viewChangeTasks = [];
+ private CancellationToken ShutdownToken => _owner.OnStoppedToken;
+
+ private RingRange _currentRange;
+
+ // The current directory membership snapshot.
+ public DirectoryMembershipSnapshot CurrentView { get; private set; } = DirectoryMembershipSnapshot.Default;
+
+ public async ValueTask RefreshViewAsync(MembershipVersion version, CancellationToken cancellationToken)
+ {
+ _ = _owner.RefreshViewAsync(version, cancellationToken);
+ if (CurrentView.Version <= version)
+ {
+ await foreach (var view in _viewUpdates.WithCancellation(cancellationToken))
+ {
+ if (view.Version >= version)
+ {
+ break;
+ }
+ }
+ }
+
+ return CurrentView;
+ }
+
+ async ValueTask IGrainDirectoryPartition.GetSnapshotAsync(MembershipVersion version, MembershipVersion rangeVersion, RingRange range)
+ {
+ if (_logger.IsEnabled(LogLevel.Trace))
+ {
+ _logger.LogTrace("GetSnapshotAsync('{Version}', '{RangeVersion}', '{Range}')", version, rangeVersion, range);
+ }
+
+ // Wait for the range to be unlocked.
+ await WaitForRange(range, version);
+
+ ShutdownToken.ThrowIfCancellationRequested();
+ List partitionAddresses = [];
+ foreach (var partitionSnapshot in _partitionSnapshots)
+ {
+ if (partitionSnapshot.DirectoryMembershipVersion != rangeVersion)
+ {
+ continue;
+ }
+
+ // Only include addresses which are in the requested range.
+ foreach (var address in partitionSnapshot.GrainAddresses)
+ {
+ if (range.Contains(address.GrainId))
+ {
+ partitionAddresses.Add(address);
+ }
+ }
+
+ var rangeSnapshot = new GrainDirectoryPartitionSnapshot(rangeVersion, partitionAddresses);
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Transferring '{Count}' entries in range '{Range}' from version '{Version}' snapshot.", partitionAddresses.Count, range, rangeVersion);
+ }
+
+ return rangeSnapshot;
+ }
+
+ _logger.LogWarning("Received a request for a snapshot which this replica does not have, version '{Version}', range version '{RangeVersion}', range '{Range}'.", version, rangeVersion, range);
+ return null;
+ }
+
+ ValueTask IGrainDirectoryPartition.AcknowledgeSnapshotTransferAsync(SiloAddress silo, int partitionIndex, MembershipVersion rangeVersion)
+ {
+ RemoveSnapshotTransferPartner((silo, partitionIndex), rangeVersion);
+ return new(true);
+ }
+
+ private void RemoveSnapshotTransferPartner((SiloAddress Silo, int PartitionIndex) owner, MembershipVersion? rangeVersion)
+ {
+ for (var i = 0; i < _partitionSnapshots.Count; ++i)
+ {
+ var partitionSnapshot = _partitionSnapshots[i];
+ if (rangeVersion.HasValue && partitionSnapshot.DirectoryMembershipVersion != rangeVersion.Value)
+ {
+ continue;
+ }
+
+ var partners = partitionSnapshot.TransferPartners;
+ partners.RemoveWhere(p => p.SiloAddress.Equals(owner.Silo) && (owner.PartitionIndex < 0 || p.PartitionIndex == owner.PartitionIndex));
+ if (partners.Count == 0)
+ {
+ _partitionSnapshots.RemoveAt(i);
+ --i;
+
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Removing version '{Version}' snapshot. Current snapshots: [{CurrentSnapshots}].", partitionSnapshot.DirectoryMembershipVersion, string.Join(", ", _partitionSnapshots.Select(s => s.DirectoryMembershipVersion)));
+ }
+
+ // If shutdown has been requested and there are no more pending snapshots, signal completion.
+ if (_drainSnapshotsCts.IsCancellationRequested && _partitionSnapshots.Count == 0)
+ {
+ _snapshotsDrainedTcs.TrySetResult();
+ }
+ }
+ }
+ }
+
+ [Conditional("DEBUG")]
+ private void DebugAssertOwnership(GrainId grainId) => DebugAssertOwnership(CurrentView, grainId);
+
+ [Conditional("DEBUG")]
+ private void DebugAssertOwnership(DirectoryMembershipSnapshot view, GrainId grainId)
+ {
+ if (!view.TryGetOwner(grainId, out var owner, out var partitionReference))
+ {
+ Debug.Fail($"Could not find owner for grain grain '{grainId}' in view '{view}'.");
+ }
+
+ if (!_id.Equals(owner))
+ {
+ Debug.Fail($"'{_id}' expected to be the owner of grain '{grainId}', but the owner is '{owner}'.");
+ }
+
+ if (!GrainId.Equals(partitionReference.GetGrainId()))
+ {
+ Debug.Fail($"'{GrainId}' expected to be the owner of grain '{grainId}', but the owner is '{partitionReference.GetGrainId()}'.");
+ }
+ }
+
+ private bool IsOwner(DirectoryMembershipSnapshot view, GrainId grainId) => view.TryGetOwner(grainId, out _, out var partitionReference) && GrainId.Equals(partitionReference.GetGrainId());
+
+ private ValueTask WaitForRange(GrainId grainId, MembershipVersion version) => WaitForRange(RingRange.FromPoint(grainId.GetUniformHashCode()), version);
+
+ private ValueTask WaitForRange(RingRange range, MembershipVersion version)
+ {
+ GrainRuntime.CheckRuntimeContext(this);
+ Task? completion = null;
+ if (CurrentView.Version < version || TryGetIntersectingLock(range, version, out completion))
+ {
+ return WaitForRangeCore(range, version, completion);
+ }
+
+ return ValueTask.CompletedTask;
+
+ bool TryGetIntersectingLock(RingRange range, MembershipVersion version, [NotNullWhen(true)] out Task? completion)
+ {
+ foreach (var rangeLock in _rangeLocks)
+ {
+ if (rangeLock.Version <= version && range.Intersects(rangeLock.Range))
+ {
+ completion = rangeLock.Completion.Task;
+ return true;
+ }
+ }
+
+ completion = null;
+ return false;
+ }
+
+ async ValueTask WaitForRangeCore(RingRange range, MembershipVersion version, Task? task)
+ {
+ if (task is not null)
+ {
+ await task;
+ }
+
+ if (CurrentView.Version < version)
+ {
+ await RefreshViewAsync(version, ShutdownToken);
+ }
+
+ while (TryGetIntersectingLock(range, version, out var completion))
+ {
+ await completion.WaitAsync(ShutdownToken);
+ }
+ }
+ }
+
+ public IGrainDirectoryPartition GetReplicaReference(SiloAddress address, int partitionIndex) => _grainFactory.GetSystemTarget(CreateGrainId(address, partitionIndex).GrainId);
+
+ internal async Task OnShuttingDown(CancellationToken token)
+ {
+ await this.RunOrQueueTask(async () =>
+ {
+ _drainSnapshotsCts.Cancel();
+ if (_partitionSnapshots.Count > 0)
+ {
+ await _snapshotsDrainedTcs.Task.WaitAsync(token).SuppressThrowing();
+ }
+ });
+ }
+ internal Task OnSiloRemovedFromClusterAsync(ClusterMember change) =>
+ this.QueueAction(
+ static state => state.Self.OnSiloRemovedFromCluster(state.Change),
+ (Self: this, Change: change),
+ nameof(OnSiloRemovedFromCluster));
+
+ private void OnSiloRemovedFromCluster(ClusterMember change)
+ {
+ GrainRuntime.CheckRuntimeContext(this);
+ var toRemove = new List();
+ foreach (var entry in _directory)
+ {
+ if (change.SiloAddress.Equals(entry.Value.SiloAddress))
+ {
+ toRemove.Add(entry.Value);
+ }
+ }
+
+ if (toRemove.Count > 0)
+ {
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Deleting '{Count}' entries located on now-defunct silo '{SiloAddress}'.", toRemove.Count, change.SiloAddress);
+ }
+
+ foreach (var grainAddress in toRemove)
+ {
+#if false
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Deleting '{GrainAddress}' located on now-defunct silo '{SiloAddress}'.", grainAddress, change.SiloAddress);
+ }
+#endif
+ DeregisterCore(grainAddress);
+ }
+ }
+
+ RemoveSnapshotTransferPartner((change.SiloAddress, -1), rangeVersion: null);
+ }
+
+ internal Task ProcessMembershipUpdateAsync(DirectoryMembershipSnapshot current) =>
+ this.QueueAction(
+ static state => state.Self.ProcessMembershipUpdate(state.Current),
+ (Self: this, Current: current),
+ nameof(ProcessMembershipUpdate));
+
+ private void ProcessMembershipUpdate(DirectoryMembershipSnapshot current)
+ {
+ GrainRuntime.CheckRuntimeContext(this);
+
+ _viewChangeTasks.RemoveAll(task => task.IsCompleted);
+
+ if (_logger.IsEnabled(LogLevel.Trace))
+ {
+ _logger.LogTrace("Observed membership version '{Version}'.", current.Version);
+ }
+
+ var previous = CurrentView;
+ CurrentView = current;
+
+ var previousRange = previous.GetRange(_id, _partitionIndex);
+ _currentRange = current.GetRange(_id, _partitionIndex);
+
+ // It is important that this method is synchronous, to ensure that updates are atomic.
+ var deltaSize = _currentRange.SizePercent - previousRange.SizePercent;
+ var meanSizePercent = current.Members.Length > 0 ? 100.0 / current.Members.Length : 0f;
+ var deviationFromMean = Math.Abs(meanSizePercent - _currentRange.SizePercent);
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Updating view from '{PreviousVersion}' to '{Version}'. Now responsible for '{Range}' (Δ {DeltaPercent:0.00}%. {DeviationFromMean:0.00}% from ideal share).", previous.Version, current.Version, _currentRange, deltaSize, deviationFromMean);
+ }
+
+ var removedRange = previousRange.Difference(_currentRange).SingleOrDefault();
+ var addedRange = _currentRange.Difference(previousRange).SingleOrDefault();
+
+#if DEBUG
+ Debug.Assert(addedRange.IsEmpty ^ removedRange.IsEmpty || addedRange.IsEmpty && removedRange.IsEmpty); // Either the range grew or it shrank, but not both.
+ Debug.Assert(previousRange.Difference(_currentRange).Count() < 2);
+ Debug.Assert(_currentRange.Difference(previousRange).Count() < 2);
+ Debug.Assert(_currentRange.Size == previousRange.Size + addedRange.Size - removedRange.Size);
+ Debug.Assert(!removedRange.Intersects(addedRange));
+ Debug.Assert(!removedRange.Intersects(_currentRange));
+ Debug.Assert(removedRange.IsEmpty || removedRange.Intersects(previousRange));
+ Debug.Assert(!addedRange.Intersects(removedRange));
+ Debug.Assert(addedRange.IsEmpty || addedRange.Intersects(_currentRange));
+ Debug.Assert(!addedRange.Intersects(previousRange));
+ Debug.Assert(previousRange.IsEmpty || _currentRange.IsEmpty || previousRange.Start == _currentRange.Start);
+#endif
+
+ if (!removedRange.IsEmpty)
+ {
+ _viewChangeTasks.Add(ReleaseRangeAsync(previous, current, removedRange));
+ }
+
+ if (!addedRange.IsEmpty)
+ {
+ _viewChangeTasks.Add(AcquireRangeAsync(previous, current, addedRange));
+ }
+
+ _viewUpdates.Publish(current);
+ }
+
+ private async Task ReleaseRangeAsync(DirectoryMembershipSnapshot previous, DirectoryMembershipSnapshot current, RingRange removedRange)
+ {
+ GrainRuntime.CheckRuntimeContext(this);
+ var (tcs, sw) = LockRange(removedRange, current.Version);
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Relinquishing ownership of range '{Range}'.", removedRange);
+ }
+
+ try
+ {
+ // Snapshot & remove everything not in the current range.
+ // The new owner will have the opportunity to retrieve the snapshot as they take ownership.
+ List removedAddresses = [];
+ HashSet<(SiloAddress, int)> transferPartners = [];
+
+ // Wait for the range being removed to become valid.
+ await WaitForRange(removedRange, previous.Version);
+
+ GrainRuntime.CheckRuntimeContext(this);
+ if (_logger.IsEnabled(LogLevel.Trace))
+ {
+ _logger.LogTrace("Relinquishing ownership of range '{Range}'.", removedRange);
+ }
+
+ foreach (var (range, ownerIndex, partitionIndex) in current.RangeOwners)
+ {
+ if (range.Intersects(removedRange))
+ {
+ var owner = current.Members[ownerIndex];
+ Debug.Assert(!_id.Equals(owner));
+ transferPartners.Add((owner, partitionIndex));
+ }
+ }
+
+ // Collect all addresses that are not in the owned range.
+ foreach (var entry in _directory)
+ {
+ if (removedRange.Contains(entry.Key))
+ {
+ removedAddresses.Add(entry.Value);
+ }
+ }
+
+ // Remove these addresses from the partition.
+ foreach (var address in removedAddresses)
+ {
+ if (transferPartners.Count > 0)
+ {
+ _logger.LogTrace("Evicting entry '{Address}' to snapshot.", address);
+ }
+
+ _directory.Remove(address.GrainId);
+ }
+
+ if (transferPartners.Count > 0)
+ {
+ _partitionSnapshots.Add(new PartitionSnapshotState(previous.Version, removedAddresses, transferPartners));
+ }
+ else
+ {
+ _logger.LogDebug("Dropping snapshot since there are no transfer partners.");
+ }
+ }
+ finally
+ {
+ UnlockRange(removedRange, current.Version, tcs, sw.Elapsed, "release");
+ }
+ }
+
+ private async Task AcquireRangeAsync(DirectoryMembershipSnapshot previous, DirectoryMembershipSnapshot current, RingRange addedRange)
+ {
+ GrainRuntime.CheckRuntimeContext(this);
+ // Suspend the range and transfer state from the previous owners.
+ // If the predecessor becomes unavailable or membership advances quickly, we will declare data loss and unlock the range.
+ var (tcs, sw) = LockRange(addedRange, current.Version);
+
+ try
+ {
+ CoarseStopwatch stopwatch = default;
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Acquiring range '{Range}'.", addedRange);
+ stopwatch = CoarseStopwatch.StartNew();
+ }
+
+ // The view change is contiguous if the new version is exactly one greater than the previous version.
+ // If not, we have missed some updates, so we must declare a potential data loss event.
+ var isContiguous = current.Version.Value == previous.Version.Value + 1;
+ bool success;
+ if (isContiguous)
+ {
+ // Transfer subranges from previous owners.
+ var tasks = new List>();
+ foreach (var previousOwner in previous.Members)
+ {
+ var previousOwnerRanges = previous.GetMemberRangesByPartition(previousOwner);
+ for (var partitionIndex = 0; partitionIndex < previousOwnerRanges.Length; partitionIndex++)
+ {
+ var previousOwnerRange = previousOwnerRanges[partitionIndex];
+ if (previousOwnerRange.Intersects(addedRange))
+ {
+ tasks.Add(TransferSnapshotAsync(current, addedRange, previousOwner, partitionIndex, previous.Version));
+ }
+ }
+ }
+
+ // Note: there should be no 'await' points before this point.
+ // An await before this point would result in ranges not being locked synchronously.
+ await Task.WhenAll(tasks).WaitAsync(ShutdownToken).SuppressThrowing();
+ if (ShutdownToken.IsCancellationRequested)
+ {
+ return;
+ }
+
+ success = tasks.All(t => t.Result);
+ }
+ else
+ {
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug(
+ "Non-contiguous view change detected: '{PreviousVersion}' to '{CurrentVersion}'. Performing recovery.",
+ previous.Version,
+ current.Version);
+ }
+
+ success = false;
+ }
+
+ var recovered = false;
+ if (!success)
+ {
+ // Wait for previous versions to be unlocked before proceeding.
+ await WaitForRange(addedRange, previous.Version);
+
+ await RecoverPartitionRange(current, addedRange);
+ recovered = true;
+ }
+
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Completed transferring entries for range '{Range}' at version '{Version}' took {Elapsed}ms.{Recovered}", addedRange, current.Version, stopwatch.ElapsedMilliseconds, recovered ? " Recovered" : "");
+ }
+ }
+ finally
+ {
+ UnlockRange(addedRange, current.Version, tcs, sw.Elapsed, "acquire");
+ }
+ }
+
+ private (TaskCompletionSource Lock, ValueStopwatch Stopwatch) LockRange(RingRange range, MembershipVersion version)
+ {
+ var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
+ _rangeLocks.Add((range, version, tcs));
+ return (tcs, ValueStopwatch.StartNew());
+ }
+
+ private void UnlockRange(RingRange range, MembershipVersion version, TaskCompletionSource tcs, TimeSpan heldDuration, string operationName)
+ {
+ DirectoryInstruments.RangeLockHeldDuration.Record((long)heldDuration.TotalMilliseconds);
+ if (ShutdownToken.IsCancellationRequested)
+ {
+ // If the replica is stopped, the range is never unlocked and the task is cancelled instead.
+ tcs.SetCanceled(ShutdownToken);
+ }
+ else
+ {
+ tcs.SetResult();
+ _rangeLocks.Remove((range, version, tcs));
+ }
+ }
+
+ private async Task TransferSnapshotAsync(DirectoryMembershipSnapshot current, RingRange addedRange, SiloAddress previousOwner, int partitionIndex, MembershipVersion previousVersion)
+ {
+ try
+ {
+ var stopwatch = ValueStopwatch.StartNew();
+ if (_logger.IsEnabled(LogLevel.Trace))
+ {
+ _logger.LogTrace("Requesting entries for ranges '{Range}' from '{PreviousOwner}' at version '{PreviousVersion}'.", addedRange, previousOwner, previousVersion);
+ }
+
+ var replica = GetReplicaReference(previousOwner, partitionIndex);
+
+ // Alternatively, the previous owner could push the snapshot. The pull-based approach is used here because it is simpler.
+ var snapshot = await replica.GetSnapshotAsync(current.Version, previousVersion, addedRange).AsTask().WaitAsync(ShutdownToken);
+
+ if (snapshot is null)
+ {
+ _logger.LogWarning("Expected a valid snapshot from previous owner '{PreviousOwner}' for part of ranges '{Range}', but found none.", previousOwner, addedRange);
+ return false;
+ }
+
+ // The acknowledgement step lets the previous owner know that the snapshot has been received so that it can proceed.
+ InvokeOnClusterMember(
+ previousOwner,
+ async () => await replica.AcknowledgeSnapshotTransferAsync(_id, _partitionIndex, previousVersion),
+ false,
+ nameof(IGrainDirectoryPartition.AcknowledgeSnapshotTransferAsync)).Ignore();
+
+ // Wait for previous versions to be unlocked before proceeding.
+ await WaitForRange(addedRange, previousVersion);
+
+ // Incorporate the values into the grain directory.
+ foreach (var entry in snapshot.GrainAddresses)
+ {
+ DebugAssertOwnership(current, entry.GrainId);
+
+ _logger.LogTrace("Received '{Entry}' via snapshot from '{PreviousOwner}' for version '{Version}'.", entry, previousOwner, previousVersion);
+ _directory[entry.GrainId] = entry;
+ }
+
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Transferred '{Count}' entries for range '{Range}' from '{PreviousOwner}'.", snapshot.GrainAddresses.Count, addedRange, previousOwner);
+ }
+
+ DirectoryInstruments.SnapshotTransferCount.Add(1);
+ DirectoryInstruments.SnapshotTransferDuration.Record((long)stopwatch.Elapsed.TotalMilliseconds);
+
+ return true;
+ }
+ catch (Exception exception)
+ {
+ if (exception is SiloUnavailableException)
+ {
+ _logger.LogWarning("Remote host became unavailable while transferring ownership of range '{Range}'. Recovery will be performed.", addedRange);
+ }
+ else
+ {
+ _logger.LogWarning(exception, "Error transferring ownership of range '{Range}'. Recovery will be performed.", addedRange);
+ }
+
+ return false;
+ }
+ }
+
+ private async Task RecoverPartitionRange(DirectoryMembershipSnapshot current, RingRange addedRange)
+ {
+ var stopwatch = ValueStopwatch.StartNew();
+ GrainRuntime.CheckRuntimeContext(this);
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Recovering activations from range '{Range}' at version '{Version}'.", addedRange, current.Version);
+ }
+
+ await foreach (var activations in GetRegisteredActivations(current, addedRange, isValidation: false))
+ {
+ GrainRuntime.CheckRuntimeContext(this);
+ foreach (var entry in activations)
+ {
+ DebugAssertOwnership(current, entry.GrainId);
+ _logger.LogTrace("Recovered '{Entry}' for version '{Version}'.", entry, current.Version);
+ _directory[entry.GrainId] = entry;
+ }
+ }
+
+ DirectoryInstruments.RangeRecoveryCount.Add(1);
+ DirectoryInstruments.RangeRecoveryDuration.Record((long)stopwatch.Elapsed.TotalMilliseconds);
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Completed recovering activations from range '{Range}' at version '{Version}' took '{Elapsed}'.", addedRange, current.Version, stopwatch.Elapsed);
+ }
+ }
+
+ private async IAsyncEnumerable> GetRegisteredActivations(DirectoryMembershipSnapshot current, RingRange range, bool isValidation)
+ {
+ // Membership is guaranteed to be at least as recent as the current view.
+ var clusterMembershipSnapshot = _owner.ClusterMembershipSnapshot;
+ Debug.Assert(clusterMembershipSnapshot.Version >= current.Version);
+
+ var tasks = new List>>();
+ foreach (var member in clusterMembershipSnapshot.Members.Values)
+ {
+ if (member.Status is not (SiloStatus.Active or SiloStatus.Joining or SiloStatus.ShuttingDown))
+ {
+ continue;
+ }
+
+ tasks.Add(GetRegisteredActivationsFromClusterMember(current.Version, range, member.SiloAddress, isValidation));
+ }
+
+ await Task.WhenAll(tasks).WaitAsync(ShutdownToken).SuppressThrowing();
+ if (ShutdownToken.IsCancellationRequested)
+ {
+ yield break;
+ }
+
+ foreach (var task in tasks)
+ {
+ yield return await task;
+ }
+
+ async Task> GetRegisteredActivationsFromClusterMember(MembershipVersion version, RingRange range, SiloAddress siloAddress, bool isValidation)
+ {
+ var stopwatch = ValueStopwatch.StartNew();
+ var client = _grainFactory.GetSystemTarget(Constants.GrainDirectory, siloAddress);
+ var result = await InvokeOnClusterMember(
+ siloAddress,
+ async () => await client.GetRegisteredActivations(version, range, isValidation),
+ new Immutable>([]),
+ nameof(GetRegisteredActivations));
+
+ if (_logger.IsEnabled(LogLevel.Debug))
+ {
+ _logger.LogDebug("Recovered '{Count}' entries from silo '{SiloAddress}' for ranges '{Range}' at version '{Version}' in {ElapsedMilliseconds}ms.", result.Value.Count, siloAddress, range, version, stopwatch.Elapsed.TotalMilliseconds);
+ }
+
+ return result.Value;
+ }
+ }
+
+ private async Task InvokeOnClusterMember(SiloAddress siloAddress, Func> func, T defaultValue, string operationName)
+ {
+ GrainRuntime.CheckRuntimeContext(this);
+ var clusterMembershipSnapshot = _owner.ClusterMembershipSnapshot;
+ while (!ShutdownToken.IsCancellationRequested)
+ {
+ if (clusterMembershipSnapshot.GetSiloStatus(siloAddress) is not (SiloStatus.Active or SiloStatus.Joining or SiloStatus.ShuttingDown))
+ {
+ break;
+ }
+
+ try
+ {
+ return await func();
+ }
+ catch (Exception ex)
+ {
+ if (ex is not OrleansMessageRejectionException)
+ {
+ _logger.LogError(ex, "Error invoking operation '{Operation}' on silo '{SiloAddress}'.", operationName, siloAddress);
+ }
+
+ await _owner.RefreshViewAsync(default, CancellationToken.None);
+ if (_owner.ClusterMembershipSnapshot.Version == clusterMembershipSnapshot.Version)
+ {
+ await Task.Delay(TimeSpan.FromMilliseconds(100));
+ }
+
+ clusterMembershipSnapshot = _owner.ClusterMembershipSnapshot;
+ }
+ }
+
+ ShutdownToken.ThrowIfCancellationRequested();
+ return defaultValue;
+ }
+
+ async ValueTask IGrainDirectoryTestHooks.CheckIntegrityAsync()
+ {
+ GrainRuntime.CheckRuntimeContext(this);
+ var current = CurrentView;
+ var range = _currentRange;
+ Debug.Assert(range.Equals(current.GetRange(_id, _partitionIndex)));
+
+ await WaitForRange(RingRange.Full, current.Version);
+ var tcs = new TaskCompletionSource(TaskCreationOptions.RunContinuationsAsynchronously);
+ _rangeLocks.Add((RingRange.Full, current.Version, tcs));
+ try
+ {
+ foreach (var entry in _directory)
+ {
+ if (!range.Contains(entry.Key))
+ {
+ Debug.Fail($"Invariant violated. This host is not the owner of grain '{entry.Key}'.");
+ }
+
+ DebugAssertOwnership(current, entry.Key);
+ }
+
+ var missing = 0;
+ var mismatched = 0;
+ var total = 0;
+ await foreach (var activationList in GetRegisteredActivations(current, range, isValidation: true))
+ {
+ total += activationList.Count;
+ foreach (var entry in activationList)
+ {
+ if (!IsOwner(current, entry.GrainId))
+ {
+ // The view has been refreshed since the request for registered activations was made.
+ if (current.Version <= current.Version)
+ {
+ Debug.Fail("Invariant violated. This host was sent a registration which it should not have been.");
+ }
+
+ continue;
+ }
+
+ if (_directory.TryGetValue(entry.GrainId, out var existingEntry))
+ {
+ if (!existingEntry.Equals(entry))
+ {
+ ++mismatched;
+ _logger.LogError("Integrity violation: Recovered entry '{RecoveredRecord}' does not match existing entry '{LocalRecord}'.", entry, existingEntry);
+ Debug.Fail($"Integrity violation: Recovered entry '{entry}' does not match existing entry '{existingEntry}'.");
+ }
+ }
+ else
+ {
+ ++missing;
+ _logger.LogError("Integrity violation: Recovered entry '{RecoveredRecord}' not found in directory.", entry);
+ Debug.Fail($"Integrity violation: Recovered entry '{entry}' not found in directory.");
+ }
+ }
+ }
+ }
+ finally
+ {
+ if (ShutdownToken.IsCancellationRequested)
+ {
+ tcs.SetCanceled(ShutdownToken);
+ }
+ else
+ {
+ tcs.SetResult();
+ }
+
+ _rangeLocks.Remove((RingRange.Full, current.Version, tcs));
+ }
+ }
+
+ private sealed record class PartitionSnapshotState(
+ MembershipVersion DirectoryMembershipVersion,
+ List GrainAddresses,
+ HashSet<(SiloAddress SiloAddress, int PartitionIndex)> TransferPartners);
+}
diff --git a/src/Orleans.Runtime/GrainDirectory/GrainDirectoryResolver.cs b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryResolver.cs
index c608686e73..164dc6477e 100644
--- a/src/Orleans.Runtime/GrainDirectory/GrainDirectoryResolver.cs
+++ b/src/Orleans.Runtime/GrainDirectory/GrainDirectoryResolver.cs
@@ -29,7 +29,7 @@ public GrainDirectoryResolver(
var services = serviceProvider.GetGrainDirectories();
foreach (var svc in services)
{
- this.directoryPerName.Add(svc.Name, serviceProvider.GetRequiredKeyedService(svc.Name));
+ this.directoryPerName[svc.Name] = serviceProvider.GetRequiredKeyedService(svc.Name);
}
this.directoryPerName.TryGetValue(GrainDirectoryAttribute.DEFAULT_GRAIN_DIRECTORY, out var defaultDirectory);
@@ -43,7 +43,7 @@ public GrainDirectoryResolver(
public IGrainDirectory Resolve(GrainType grainType) => this.directoryPerType.GetOrAdd(grainType, this.getGrainDirectoryInternal);
- public bool IsUsingDhtDirectory(GrainType grainType) => Resolve(grainType) == null;
+ public bool IsUsingDefaultDirectory(GrainType grainType) => Resolve(grainType) == null;
private IGrainDirectory GetGrainDirectoryPerType(GrainType grainType)
{
diff --git a/src/Orleans.Runtime/GrainDirectory/GrainLocatorResolver.cs b/src/Orleans.Runtime/GrainDirectory/GrainLocatorResolver.cs
index c5e736ebb3..8b44041ca6 100644
--- a/src/Orleans.Runtime/GrainDirectory/GrainLocatorResolver.cs
+++ b/src/Orleans.Runtime/GrainDirectory/GrainLocatorResolver.cs
@@ -37,7 +37,7 @@ public IGrainLocator GetGrainLocatorInternal(GrainType grainType)
{
result = this._clientGrainLocator ??= _servicesProvider.GetRequiredService();
}
- else if (this.grainDirectoryResolver.IsUsingDhtDirectory(grainType))
+ else if (this.grainDirectoryResolver.IsUsingDefaultDirectory(grainType))
{
result = this.dhtGrainLocator;
}
diff --git a/src/Orleans.Runtime/GrainDirectory/IGrainDirectoryPartition.cs b/src/Orleans.Runtime/GrainDirectory/IGrainDirectoryPartition.cs
new file mode 100644
index 0000000000..f42df98812
--- /dev/null
+++ b/src/Orleans.Runtime/GrainDirectory/IGrainDirectoryPartition.cs
@@ -0,0 +1,39 @@
+using System.Collections.Generic;
+using System.Threading.Tasks;
+using Orleans.Concurrency;
+
+#nullable enable
+namespace Orleans.Runtime.GrainDirectory;
+
+[Alias("IGrainDirectoryReplica")]
+internal interface IGrainDirectoryPartition : ISystemTarget
+{
+ [Alias("RegisterAsync")]
+ ValueTask> RegisterAsync(MembershipVersion version, GrainAddress address, GrainAddress? currentRegistration);
+
+ [Alias("LookupAsync")]
+ ValueTask> LookupAsync(MembershipVersion version, GrainId grainId);
+
+ [Alias("DeregisterAsync")]
+ ValueTask> DeregisterAsync(MembershipVersion version, GrainAddress address);
+
+ [Alias("GetSnapshotAsync")]
+ ValueTask GetSnapshotAsync(MembershipVersion version, MembershipVersion rangeVersion, RingRange range);
+
+ [Alias("AcknowledgeSnapshotTransferAsync")]
+ ValueTask AcknowledgeSnapshotTransferAsync(SiloAddress silo, int partitionIndex, MembershipVersion version);
+}
+
+[Alias("IGrainDirectoryReplicaClient")]
+internal interface IGrainDirectoryClient : ISystemTarget
+{
+ [Alias("GetRegisteredActivations")]
+ ValueTask>> GetRegisteredActivations(MembershipVersion membershipVersion, RingRange range, bool isValidation);
+}
+
+[Alias("IGrainDirectoryReplicaTestHooks")]
+internal interface IGrainDirectoryTestHooks : ISystemTarget
+{
+ [Alias("CheckIntegrityAsync")]
+ ValueTask CheckIntegrityAsync();
+}
diff --git a/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs b/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs
index be5ec56ffc..48a0eff4c9 100644
--- a/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs
+++ b/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectory.cs
@@ -34,7 +34,7 @@ internal sealed class LocalGrainDirectory : ILocalGrainDirectory, ISiloStatusLis
internal SiloAddress MyAddress { get; }
internal IGrainDirectoryCache DirectoryCache { get; }
- internal GrainDirectoryPartition DirectoryPartition { get; }
+ internal LocalGrainDirectoryPartition DirectoryPartition { get; }
public RemoteGrainDirectory RemoteGrainDirectory { get; }
public RemoteGrainDirectory CacheValidator { get; }
@@ -46,7 +46,7 @@ public LocalGrainDirectory(
ILocalSiloDetails siloDetails,
ISiloStatusOracle siloStatusOracle,
IInternalGrainFactory grainFactory,
- Factory grainDirectoryPartitionFactory,
+ Factory grainDirectoryPartitionFactory,
IOptions developmentClusterMembershipOptions,
IOptions grainDirectoryOptions,
ILoggerFactory loggerFactory)
diff --git a/src/Orleans.Runtime/GrainDirectory/GrainDirectoryPartition.cs b/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectoryPartition.cs
similarity index 97%
rename from src/Orleans.Runtime/GrainDirectory/GrainDirectoryPartition.cs
rename to src/Orleans.Runtime/GrainDirectory/LocalGrainDirectoryPartition.cs
index b2a29753ca..f445055950 100644
--- a/src/Orleans.Runtime/GrainDirectory/GrainDirectoryPartition.cs
+++ b/src/Orleans.Runtime/GrainDirectory/LocalGrainDirectoryPartition.cs
@@ -102,7 +102,7 @@ public bool RemoveActivation(ActivationId act, UnregistrationCause cause, TimeSp
}
}
- internal sealed class GrainDirectoryPartition
+ internal sealed class LocalGrainDirectoryPartition
{
// Should we change this to SortedList<> or SortedDictionary so we can extract chunks better for shipping the full
// partition to a follower, or should we leave it as a Dictionary to get O(1) lookups instead of O(log n), figuring we do
@@ -118,11 +118,11 @@ internal sealed class GrainDirectoryPartition
internal int Count { get { return partitionData.Count; } }
- public GrainDirectoryPartition(ISiloStatusOracle siloStatusOracle, IOptions grainDirectoryOptions, ILoggerFactory loggerFactory)
+ public LocalGrainDirectoryPartition(ISiloStatusOracle siloStatusOracle, IOptions grainDirectoryOptions, ILoggerFactory loggerFactory)
{
partitionData = new Dictionary();
lockable = new object();
- log = loggerFactory.CreateLogger();
+ log = loggerFactory.CreateLogger();
this.siloStatusOracle = siloStatusOracle;
this.grainDirectoryOptions = grainDirectoryOptions;
}
@@ -260,7 +260,7 @@ internal int GetGrainETag(GrainId grain)
///
///
/// Activations which must be deactivated.
- internal Dictionary>? Merge(GrainDirectoryPartition other)
+ internal Dictionary>? Merge(LocalGrainDirectoryPartition other)
{
Dictionary>? activationsToRemove = null;
lock (lockable)
diff --git a/src/Orleans.Runtime/GrainDirectory/RemoteGrainDirectory.cs b/src/Orleans.Runtime/GrainDirectory/RemoteGrainDirectory.cs
index 3dfe2eb20c..05b418c1da 100644
--- a/src/Orleans.Runtime/GrainDirectory/RemoteGrainDirectory.cs
+++ b/src/Orleans.Runtime/GrainDirectory/RemoteGrainDirectory.cs
@@ -11,7 +11,7 @@ namespace Orleans.Runtime.GrainDirectory
internal sealed class RemoteGrainDirectory : SystemTarget, IRemoteGrainDirectory
{
private readonly LocalGrainDirectory router;
- private readonly GrainDirectoryPartition partition;
+ private readonly LocalGrainDirectoryPartition partition;
private readonly ILogger logger;
internal RemoteGrainDirectory(LocalGrainDirectory r, GrainType grainType, ILoggerFactory loggerFactory)
diff --git a/src/Orleans.Runtime/GrainDirectory/RingRange.cs b/src/Orleans.Runtime/GrainDirectory/RingRange.cs
new file mode 100644
index 0000000000..1b583a2317
--- /dev/null
+++ b/src/Orleans.Runtime/GrainDirectory/RingRange.cs
@@ -0,0 +1,241 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+#nullable enable
+namespace Orleans.Runtime.GrainDirectory;
+
+///
+/// Represents a contiguous range of zero or more values on a ring.
+///
+[GenerateSerializer, Immutable, Alias(nameof(RingRange))]
+internal readonly struct RingRange : IEquatable, ISpanFormattable, IComparable
+{
+ // The exclusive starting point for the range.
+ // Note that _start == _end == 1 is used as a special value to represent a full range.
+ [Id(0)]
+ private readonly uint _start;
+
+ // The inclusive ending point for the range.
+ // Note that _start == _end == 1 is used as a special value to represent a full range.
+ [Id(1)]
+ private readonly uint _end;
+
+ public bool IsEmpty => _start == _end && _start == 0;
+
+ public bool IsFull => _start == _end && _start != 0;
+
+ // Whether the range includes uint.MaxValue.
+ internal bool IsWrapped => _start >= _end && _start != 0;
+
+ public static RingRange Full { get; } = new (1, 1);
+
+ public static RingRange Empty { get; } = new (0, 0);
+
+ public uint Start => IsFull ? 0 : _start;
+
+ public uint End => IsFull ? 0 : _end;
+
+ private RingRange(uint start, uint end)
+ {
+ _start = start == end && start > 1 ? 1 : start;
+ _end = start == end && start > 1 ? 1 : end;
+ }
+
+ // For internal use only.
+ internal static RingRange Create(uint start, uint end) => new (start, end);
+
+ ///
+ /// Creates a range representing a single point.
+ ///
+ /// The point which the range will include.
+ /// A range including only .
+ public static RingRange FromPoint(uint point) => new (unchecked(point - 1), point);
+
+ ///
+ /// Gets the size of the range.
+ ///
+ public uint Size
+ {
+ get
+ {
+ if (_start == _end)
+ {
+ // Empty
+ if (_start == 0) return 0;
+
+ // Full
+ return uint.MaxValue;
+ }
+
+ // Normal
+ if (_end > _start) return _end - _start;
+
+ // Wrapped
+ return uint.MaxValue - _start + _end;
+ }
+ }
+
+ public int CompareTo(uint point)
+ {
+ if (Contains(point))
+ {
+ return 0;
+ }
+
+ var start = Start;
+ if (IsWrapped)
+ {
+ // Start > End (wrap-around case)
+ if (point <= start)
+ {
+ // Range starts after N (range > N)
+ return -1;
+ }
+
+ // n > _end
+ // Range starts & ends before N (range < N)
+ return 1;
+ }
+
+ if (point <= start)
+ {
+ // Range starts after N (range > N)
+ return 1;
+ }
+
+ // n > _end
+ // Range starts & ends before N (range < N)
+ return -1;
+ }
+
+ ///
+ /// Checks if n is element of (Start, End], while remembering that the ranges are on a ring
+ ///
+ /// true if n is in (Start, End], false otherwise
+ internal bool Contains(GrainId grainId) => Contains(grainId.GetUniformHashCode());
+
+ ///
+ /// checks if n is element of (Start, End], while remembering that the ranges are on a ring
+ ///
+ ///
+ /// true if n is in (Start, End], false otherwise
+ public bool Contains(uint point)
+ {
+ if (IsEmpty)
+ {
+ return false;
+ }
+
+ var num = point;
+ if (Start < End)
+ {
+ return num > Start && num <= End;
+ }
+
+ // Start > End
+ return num > Start || num <= End;
+ }
+
+ public float SizePercent => Size * (100.0f / uint.MaxValue);
+
+ public bool Equals(RingRange other) => _start == other._start && _end == other._end;
+
+ public override bool Equals(object? obj) => obj is RingRange other && Equals(other);
+
+ public override int GetHashCode() => HashCode.Combine(_start, _end);
+
+ public override string ToString() => $"{this}";
+
+ string IFormattable.ToString(string? format, IFormatProvider? formatProvider) => ToString();
+
+ bool ISpanFormattable.TryFormat(Span destination, out int charsWritten, ReadOnlySpan format, IFormatProvider? provider)
+ {
+ return IsEmpty
+ ? destination.TryWrite($"(0, 0) 0.00%", out charsWritten)
+ : IsFull
+ ? destination.TryWrite($"(0, 0] (100.00%)", out charsWritten)
+ : destination.TryWrite($"(0x{Start:X8}, 0x{End:X8}] ({SizePercent:0.00}%)", out charsWritten);
+ }
+
+ public bool Intersects(RingRange other) => !IsEmpty && !other.IsEmpty && (Equals(other) || Contains(other.End) || other.Contains(End));
+
+ internal RingRange Complement()
+ {
+ if (IsEmpty)
+ {
+ return Full;
+ }
+
+ if (IsFull)
+ {
+ return Empty;
+ }
+
+ return new RingRange(End, Start);
+ }
+
+ internal IEnumerable Intersections(RingRange other)
+ {
+ if (!Intersects(other))
+ {
+ // No intersections.
+ yield break;
+ }
+
+ if (IsFull)
+ {
+ // One intersection, the other range.
+ yield return other;
+ }
+ else if (other.IsFull)
+ {
+ yield return this;
+ }
+ else if (IsWrapped ^ other.IsWrapped)
+ {
+ var wrapped = IsWrapped ? this : other;
+ var normal = IsWrapped ? other : this;
+ var (normalStart, normalEnd) = (normal.Start, normal.End);
+ var (wrappedStart, wrappedEnd) = (wrapped.Start, wrapped.End);
+
+ // There are possibly two intersections, between the normal and wrapped range.
+ // low high
+ // ...---NB====WE----WB====NE----...
+
+ // Intersection at the low side.
+ if (wrappedEnd > normalStart)
+ {
+ // ---NB====WE---
+ yield return new RingRange(normalStart, wrappedEnd);
+ }
+
+ // Intersection at the high side.
+ if (wrappedStart < normalEnd)
+ {
+ // ---WB====NE---
+ yield return new RingRange(wrappedStart, normalEnd);
+ }
+ }
+ else
+ {
+ yield return new RingRange(Math.Max(Start, other.Start), Math.Min(End, other.End));
+ }
+ }
+
+ // Gets the set difference: the sub-ranges which are in this range but are not in the 'other' range.
+ internal IEnumerable Difference(RingRange other)
+ {
+ // Additions are the intersections between this range and the inverse of the previous range.
+ foreach (var addition in Intersections(other.Complement()))
+ {
+ Debug.Assert(!addition.Intersects(other));
+ Debug.Assert(addition.Intersects(this));
+ yield return addition;
+ }
+ }
+
+ public static bool operator ==(RingRange left, RingRange right) => left.Equals(right);
+
+ public static bool operator !=(RingRange left, RingRange right) => !(left == right);
+}
diff --git a/src/Orleans.Runtime/GrainDirectory/RingRangeCollection.cs b/src/Orleans.Runtime/GrainDirectory/RingRangeCollection.cs
new file mode 100644
index 0000000000..0772d2784e
--- /dev/null
+++ b/src/Orleans.Runtime/GrainDirectory/RingRangeCollection.cs
@@ -0,0 +1,224 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Collections.Immutable;
+using System.Diagnostics;
+using System.Linq;
+using Orleans.Runtime.Utilities;
+
+#nullable enable
+namespace Orleans.Runtime.GrainDirectory;
+
+// Read-only, sorted collection of non-overlapping ranges.
+[GenerateSerializer, Immutable, Alias(nameof(RingRangeCollection))]
+internal readonly struct RingRangeCollection : IEquatable, ISpanFormattable, IEnumerable
+{
+ public RingRangeCollection(ImmutableArray ranges)
+ {
+#if DEBUG
+ Debug.Assert(!ranges.IsDefault);
+
+ // Ranges must be in sorted order and must not overlap with each other.
+ for (var i = 1; i < ranges.Length; i++)
+ {
+ var prev = ranges[i - 1];
+ var curr = ranges[i];
+ Debug.Assert(!curr.IsEmpty);
+ Debug.Assert(!prev.Intersects(curr));
+ Debug.Assert(curr.Start >= prev.Start);
+ }
+
+ if (ranges.Length > 1)
+ {
+ Debug.Assert(!ranges[0].Intersects(ranges[^1]));
+ }
+#endif
+ Ranges = ranges;
+ }
+
+ public static RingRangeCollection Create(TCollection ranges) where TCollection : ICollection
+ {
+ ArgumentNullException.ThrowIfNull(ranges);
+ var result = ImmutableArray.CreateBuilder(ranges.Count);
+ foreach (var range in ranges)
+ {
+ if (range.IsEmpty)
+ {
+ continue;
+ }
+
+ result.AddRange(range);
+ }
+
+ result.Sort((l, r) => l.Start.CompareTo(r.Start));
+ return new(result.ToImmutable());
+ }
+
+ public static RingRangeCollection Empty { get; } = new([]);
+
+ [Id(0)]
+ public ImmutableArray Ranges { get; }
+
+ public bool IsDefault => Ranges.IsDefault;
+
+ public bool IsEmpty => Ranges.Length == 0 || Ranges.All(r => r.IsEmpty);
+
+ public bool IsFull => !IsEmpty && Ranges.Sum(r => r.Size) == uint.MaxValue;
+
+ public uint Size => (uint)Ranges.Sum(static r => r.Size);
+
+ public float SizePercent => Size * (100.0f / uint.MaxValue);
+
+ public bool Contains(GrainId grainId) => Contains(grainId.GetUniformHashCode());
+
+ public bool Contains(uint value)
+ {
+ return SearchAlgorithms.RingRangeBinarySearch(
+ Ranges.Length,
+ Ranges,
+ static (ranges, index) => ranges[index],
+ value) >= 0;
+ }
+
+ public bool Intersects(RingRange other)
+ {
+ if (IsEmpty || other.IsEmpty)
+ {
+ return false;
+ }
+
+ if (Contains(other.End))
+ {
+ return true;
+ }
+
+ foreach (var range in Ranges)
+ {
+ if (other.Contains(range.End))
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public bool Intersects(RingRangeCollection other)
+ {
+ if (IsEmpty || other.IsEmpty)
+ {
+ return false;
+ }
+
+ foreach (var range in Ranges)
+ {
+ if (other.Contains(range.End))
+ {
+ return true;
+ }
+ }
+
+ foreach (var otherRange in other.Ranges)
+ {
+ if (Contains(otherRange.End))
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public RingRangeCollection Difference(RingRangeCollection previous)
+ {
+ // Ranges in left must not overlap with each other.
+ // Ranges in right must not overlap with each other.
+ // Corresponding ranges in left and right have the same starting points.
+ // The number of ranges in both 'Ranges' or 'previous.Ranges' is either zero or the configured number of ranges,
+ // i.e., if both collections have more than zero ranges, the both have the same number of ranges.
+ if (Ranges.Length == previous.Ranges.Length)
+ {
+ var result = ImmutableArray.CreateBuilder(Ranges.Length);
+ for (var i = 0; i < Ranges.Length; i++)
+ {
+ var c = Ranges[i];
+ var p = previous.Ranges[i];
+ Debug.Assert(c.Start == p.Start);
+ if (c.Size > p.Size)
+ {
+ result.Add(RingRange.Create(p.End, c.End));
+ }
+ }
+
+ // If the last range wrapped around but its extension does not wrap around, move it to the front.
+ // This preserves sort order.
+ if (result.Count > 1 && result[^1].Start < result[^2].Start)
+ {
+ var last = result[^1];
+ result.RemoveAt(result.Count - 1);
+ result.Insert(0, last);
+ }
+
+ return new(result.ToImmutable());
+ }
+ else
+ {
+ if (Ranges.Length > previous.Ranges.Length)
+ {
+ Debug.Assert(previous.Ranges.Length == 0);
+ return this;
+ }
+ else
+ {
+ Debug.Assert(Ranges.Length == 0 ^ previous.Ranges.Length == 0);
+ return Empty;
+ }
+ }
+ }
+
+ public bool Equals(RingRangeCollection other)
+ {
+ if (IsEmpty && other.IsEmpty)
+ {
+ return true;
+ }
+
+ if (IsEmpty ^ other.IsEmpty)
+ {
+ return false;
+ }
+
+ return Ranges.SequenceEqual(other.Ranges);
+ }
+
+ public static bool operator ==(RingRangeCollection left, RingRangeCollection right) => left.Equals(right);
+
+ public static bool operator !=(RingRangeCollection left, RingRangeCollection right) => !(left == right);
+
+ public override bool Equals(object? obj) => obj is RingRangeCollection range && Equals(range);
+
+ public override int GetHashCode()
+ {
+ var result = new HashCode();
+ result.Add(Ranges.Length);
+ if (!Ranges.IsDefaultOrEmpty)
+ {
+ foreach (var range in Ranges)
+ {
+ result.Add(range);
+ }
+ }
+
+ return result.ToHashCode();
+ }
+
+ public ImmutableArray.Enumerator GetEnumerator() => Ranges.GetEnumerator();
+
+ public override string ToString() => $"{this}";
+ string IFormattable.ToString(string? format, IFormatProvider? formatProvider) => ToString();
+
+ bool ISpanFormattable.TryFormat(Span destination, out int charsWritten, ReadOnlySpan format, IFormatProvider? provider)
+ => destination.TryWrite($"({Ranges.Length} subranges), {SizePercent:0.00}%", out charsWritten);
+ IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)Ranges).GetEnumerator();
+ IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)Ranges).GetEnumerator();
+}
\ No newline at end of file
diff --git a/src/Orleans.Runtime/Hosting/CoreHostingExtensions.cs b/src/Orleans.Runtime/Hosting/CoreHostingExtensions.cs
index de51a41b56..4d95b623cc 100644
--- a/src/Orleans.Runtime/Hosting/CoreHostingExtensions.cs
+++ b/src/Orleans.Runtime/Hosting/CoreHostingExtensions.cs
@@ -1,12 +1,17 @@
+#nullable enable
using System;
using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
using System.Net;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.DependencyInjection.Extensions;
using Microsoft.Extensions.Options;
using Orleans.Configuration;
using Orleans.Configuration.Internal;
+using Orleans.GrainDirectory;
using Orleans.Runtime;
+using Orleans.Runtime.GrainDirectory;
+using Orleans.Runtime.Hosting;
using Orleans.Runtime.MembershipService;
namespace Orleans.Hosting
@@ -16,6 +21,8 @@ namespace Orleans.Hosting
///
public static class CoreHostingExtensions
{
+ private static readonly ServiceDescriptor DirectoryDescriptor = ServiceDescriptor.Singleton();
+
///
/// Add propagation through grain calls.
/// Note: according to activity will be created only when any listener for activity exists and returns .
@@ -47,7 +54,7 @@ public static ISiloBuilder UseLocalhostClustering(
this ISiloBuilder builder,
int siloPort = EndpointOptions.DEFAULT_SILO_PORT,
int gatewayPort = EndpointOptions.DEFAULT_GATEWAY_PORT,
- IPEndPoint primarySiloEndpoint = null,
+ IPEndPoint? primarySiloEndpoint = null,
string serviceId = ClusterOptions.DevelopmentServiceId,
string clusterId = ClusterOptions.DevelopmentClusterId)
{
@@ -127,7 +134,7 @@ public static ISiloBuilder UseDevelopmentClustering(
});
}
- private static void ConfigurePrimarySiloEndpoint(OptionsBuilder optionsBuilder, IPEndPoint primarySiloEndpoint)
+ private static void ConfigurePrimarySiloEndpoint(OptionsBuilder optionsBuilder, IPEndPoint? primarySiloEndpoint)
{
optionsBuilder.Configure((DevelopmentClusterMembershipOptions options, IOptions endpointOptions) =>
{
@@ -139,5 +146,31 @@ private static void ConfigurePrimarySiloEndpoint(OptionsBuilder
+ /// Opts-in to the experimental distributed grain directory.
+ ///
+ /// The silo builder to register the directory implementation with.
+ /// The name of the directory to register, or null to register the directory as the default.
+ /// The provided silo builder.
+ [Experimental("ORLEANSEXP002")]
+ public static ISiloBuilder AddDistributedGrainDirectory(this ISiloBuilder siloBuilder, string? name = null)
+ {
+ var services = siloBuilder.Services;
+ if (string.IsNullOrEmpty(name))
+ {
+ name = GrainDirectoryAttribute.DEFAULT_GRAIN_DIRECTORY;
+ }
+
+ // Distributed Grain Directory
+ services.TryAddSingleton();
+ if (!services.Contains(DirectoryDescriptor))
+ {
+ services.Add(DirectoryDescriptor);
+ services.AddGrainDirectory(name, (sp, name) => sp.GetRequiredService());
+ }
+
+ return siloBuilder;
+ }
}
}
\ No newline at end of file
diff --git a/src/Orleans.Runtime/Hosting/DefaultSiloServices.cs b/src/Orleans.Runtime/Hosting/DefaultSiloServices.cs
index 8f9d8892f3..b8b3d7e6de 100644
--- a/src/Orleans.Runtime/Hosting/DefaultSiloServices.cs
+++ b/src/Orleans.Runtime/Hosting/DefaultSiloServices.cs
@@ -193,7 +193,7 @@ internal static void AddDefaultServices(ISiloBuilder builder)
services.TryAddSingleton();
- services.TryAddSingleton(FactoryUtility.Create);
+ services.TryAddSingleton(FactoryUtility.Create);
// Placement
services.AddSingleton();
diff --git a/src/Orleans.Runtime/MembershipService/ClusterMembershipService.cs b/src/Orleans.Runtime/MembershipService/ClusterMembershipService.cs
index 80da556090..db926a662f 100644
--- a/src/Orleans.Runtime/MembershipService/ClusterMembershipService.cs
+++ b/src/Orleans.Runtime/MembershipService/ClusterMembershipService.cs
@@ -50,25 +50,27 @@ public ClusterMembershipSnapshot CurrentSnapshot
public IAsyncEnumerable MembershipUpdates => this.updates;
- public ValueTask Refresh(MembershipVersion targetVersion)
+ public ValueTask Refresh(MembershipVersion targetVersion) => Refresh(targetVersion, CancellationToken.None);
+ public ValueTask Refresh(MembershipVersion targetVersion, CancellationToken cancellationToken)
{
if (targetVersion != default && targetVersion != MembershipVersion.MinValue && this.snapshot.Version >= targetVersion)
return default;
- return RefreshAsync(targetVersion);
+ return RefreshAsync(targetVersion, cancellationToken);
- async ValueTask RefreshAsync(MembershipVersion v)
+ async ValueTask RefreshAsync(MembershipVersion v, CancellationToken cancellationToken)
{
var didRefresh = false;
do
{
+ cancellationToken.ThrowIfCancellationRequested();
if (!didRefresh || this.membershipTableManager.MembershipTableSnapshot.Version < v)
{
await this.membershipTableManager.Refresh();
didRefresh = true;
}
- await Task.Delay(TimeSpan.FromMilliseconds(10));
+ await Task.Delay(TimeSpan.FromMilliseconds(10), cancellationToken);
} while (this.snapshot.Version < v || this.snapshot.Version < this.membershipTableManager.MembershipTableSnapshot.Version);
}
}
diff --git a/src/Orleans.Runtime/MembershipService/ClusterMembershipSnapshot.cs b/src/Orleans.Runtime/MembershipService/ClusterMembershipSnapshot.cs
index 90b91e3e57..55c227950f 100644
--- a/src/Orleans.Runtime/MembershipService/ClusterMembershipSnapshot.cs
+++ b/src/Orleans.Runtime/MembershipService/ClusterMembershipSnapshot.cs
@@ -21,6 +21,8 @@ public ClusterMembershipSnapshot(ImmutableDictionary
this.Version = version;
}
+ internal static ClusterMembershipSnapshot Default => new(ImmutableDictionary.Empty, MembershipVersion.MinValue);
+
///
/// Gets the cluster members.
///
diff --git a/src/Orleans.Runtime/MembershipService/InMemoryMembershipTable.cs b/src/Orleans.Runtime/MembershipService/InMemoryMembershipTable.cs
index bd75b3f176..29059b391a 100644
--- a/src/Orleans.Runtime/MembershipService/InMemoryMembershipTable.cs
+++ b/src/Orleans.Runtime/MembershipService/InMemoryMembershipTable.cs
@@ -32,7 +32,7 @@ public MembershipTableData Read(SiloAddress key)
public MembershipTableData ReadAll()
{
- return new MembershipTableData(siloTable.Values.Select(tuple =>
+ return new MembershipTableData(siloTable.Values.Select(tuple =>
new Tuple(this.deepCopier.Copy(tuple.Item1), tuple.Item2)).ToList(), tableVersion);
}
@@ -47,7 +47,7 @@ public bool Insert(MembershipEntry entry, TableVersion version)
siloTable.TryGetValue(entry.SiloAddress, out data);
if (data != null) return false;
if (!tableVersion.VersionEtag.Equals(version.VersionEtag)) return false;
-
+
siloTable[entry.SiloAddress] = new Tuple(
entry, lastETagCounter++.ToString(CultureInfo.InvariantCulture));
tableVersion = new TableVersion(version.Version, NewETag());
@@ -60,7 +60,7 @@ public bool Update(MembershipEntry entry, string etag, TableVersion version)
siloTable.TryGetValue(entry.SiloAddress, out data);
if (data == null) return false;
if (!data.Item2.Equals(etag) || !tableVersion.VersionEtag.Equals(version.VersionEtag)) return false;
-
+
siloTable[entry.SiloAddress] = new Tuple(
entry, lastETagCounter++.ToString(CultureInfo.InvariantCulture));
tableVersion = new TableVersion(version.Version, NewETag());
@@ -83,5 +83,23 @@ private string NewETag()
{
return lastETagCounter++.ToString(CultureInfo.InvariantCulture);
}
+
+ public void CleanupDefunctSiloEntries(DateTimeOffset beforeDate)
+ {
+ var removedEnties = new List();
+ foreach (var (key, (value, etag)) in siloTable)
+ {
+ if (value.Status == SiloStatus.Dead
+ && new DateTime(Math.Max(value.IAmAliveTime.Ticks, value.StartTime.Ticks), DateTimeKind.Utc) < beforeDate)
+ {
+ removedEnties.Add(key);
+ }
+ }
+
+ foreach (var removedEntry in removedEnties)
+ {
+ siloTable.Remove(removedEntry);
+ }
+ }
}
}
diff --git a/src/Orleans.Runtime/MembershipService/LocalSiloHealthMonitor.cs b/src/Orleans.Runtime/MembershipService/LocalSiloHealthMonitor.cs
index 0a93c7058e..1f6a639796 100644
--- a/src/Orleans.Runtime/MembershipService/LocalSiloHealthMonitor.cs
+++ b/src/Orleans.Runtime/MembershipService/LocalSiloHealthMonitor.cs
@@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using System.Collections.Immutable;
+using System.Diagnostics;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;
diff --git a/src/Orleans.Runtime/MembershipService/SystemTargetBasedMembershipTable.cs b/src/Orleans.Runtime/MembershipService/SystemTargetBasedMembershipTable.cs
index eb77a9bdae..c932dc6bcb 100644
--- a/src/Orleans.Runtime/MembershipService/SystemTargetBasedMembershipTable.cs
+++ b/src/Orleans.Runtime/MembershipService/SystemTargetBasedMembershipTable.cs
@@ -4,12 +4,9 @@
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
-using Orleans;
using Orleans.Concurrency;
using Orleans.Configuration;
-using Orleans.Hosting;
using Orleans.Internal;
-using Orleans.Providers;
using Orleans.Serialization;
namespace Orleans.Runtime.MembershipService
@@ -105,10 +102,7 @@ private async Task WaitForTableGrainToInit(IMembershipTableSystemTarget membersh
public Task UpdateIAmAlive(MembershipEntry entry) => this.grain.UpdateIAmAlive(entry);
- public Task CleanupDefunctSiloEntries(DateTimeOffset beforeDate)
- {
- throw new NotImplementedException();
- }
+ public Task CleanupDefunctSiloEntries(DateTimeOffset beforeDate) => this.grain.CleanupDefunctSiloEntries(beforeDate);
}
[Reentrant]
@@ -199,7 +193,8 @@ public Task UpdateIAmAlive(MembershipEntry entry)
public Task CleanupDefunctSiloEntries(DateTimeOffset beforeDate)
{
- throw new NotImplementedException();
+ table.CleanupDefunctSiloEntries(beforeDate);
+ return Task.CompletedTask;
}
void ILifecycleParticipant.Participate(ISiloLifecycle lifecycle)
@@ -207,4 +202,4 @@ void ILifecycleParticipant.Participate(ISiloLifecycle lifecycle)
// Do nothing, just ensure that this instance is created so that it can register itself in the catalog.
}
}
-}
\ No newline at end of file
+}
diff --git a/src/Orleans.Runtime/Messaging/MessageCenter.cs b/src/Orleans.Runtime/Messaging/MessageCenter.cs
index ddf8539414..1c20347ce0 100644
--- a/src/Orleans.Runtime/Messaging/MessageCenter.cs
+++ b/src/Orleans.Runtime/Messaging/MessageCenter.cs
@@ -178,8 +178,8 @@ public void SendMessage(Message msg)
if (msg.TargetSilo is not { } targetSilo)
{
- log.LogError((int)ErrorCode.Runtime_Error_100113, "Message does not have a target silo: " + msg + " -- Call stack is: " + Utils.GetStackTrace());
- SendRejection(msg, Message.RejectionTypes.Unrecoverable, "Message to be sent does not have a target silo");
+ log.LogError((int)ErrorCode.Runtime_Error_100113, "Message does not have a target silo: '{Message}'. Call stack: {StackTrace}", msg, Utils.GetStackTrace());
+ SendRejection(msg, Message.RejectionTypes.Unrecoverable, "Message to be sent does not have a target silo.");
return;
}
@@ -198,13 +198,6 @@ public void SendMessage(Message msg)
}
else
{
- if (stopped)
- {
- log.LogInformation((int)ErrorCode.Runtime_Error_100115, "Message was queued for sending after outbound queue was stopped: {Message}", msg);
- SendRejection(msg, Message.RejectionTypes.Unrecoverable, "Message was queued for sending after outbound queue was stopped");
- return;
- }
-
if (this.connectionManager.TryGetConnection(targetSilo, out var existingConnection))
{
existingConnection.Send(msg);
@@ -213,8 +206,12 @@ public void SendMessage(Message msg)
else if (this.siloStatusOracle.IsDeadSilo(targetSilo))
{
// Do not try to establish
- this.messagingTrace.OnRejectSendMessageToDeadSilo(_siloAddress, msg);
- this.SendRejection(msg, Message.RejectionTypes.Transient, "Target silo is known to be dead");
+ if (msg.Direction is Message.Directions.Request or Message.Directions.OneWay)
+ {
+ this.messagingTrace.OnRejectSendMessageToDeadSilo(_siloAddress, msg);
+ this.SendRejection(msg, Message.RejectionTypes.Transient, "Target silo is known to be dead", new SiloUnavailableException());
+ }
+
return;
}
else
@@ -373,6 +370,7 @@ private void TryForwardRequest(Message message, GrainAddress? oldAddress, GrainA
message.AddToCacheInvalidationHeader(oldAddress, validAddress: destination);
}
+ if (log.IsEnabled(LogLevel.Debug)) log.LogDebug(exc, "Forwarding {Message} to '{ForwardingAddress}' after '{FailedOperation}'", message, forwardingAddress, failedOperation);
forwardingSucceeded = this.TryForwardMessage(message, forwardingAddress);
}
catch (Exception exc2)
@@ -422,6 +420,7 @@ private bool TryForwardMessage(Message message, SiloAddress? forwardingAddress)
message.ForwardCount = message.ForwardCount + 1;
MessagingProcessingInstruments.OnDispatcherMessageForwared(message);
+
ResendMessageImpl(message, forwardingAddress);
return true;
}
@@ -574,7 +573,7 @@ private void ProcessMessageToNonExistentActivation(Message msg)
{
MessagingInstruments.OnRejectedMessage(msg);
this.log.LogWarning(
- (int) ErrorCode.MessagingMessageFromUnknownActivation,
+ (int)ErrorCode.MessagingMessageFromUnknownActivation,
"Received a message {Message} for an unknown SystemTarget: {Target}",
msg,
msg.TargetGrain);
@@ -593,17 +592,20 @@ private void ProcessMessageToNonExistentActivation(Message msg)
else
{
// Activation does not exists and is not a new placement.
- log.LogInformation(
- (int)ErrorCode.Dispatcher_Intermediate_GetOrCreateActivation,
- "Intermediate NonExistentActivation for message {Message}",
- msg);
+ if (log.IsEnabled(LogLevel.Debug))
+ {
+ log.LogDebug(
+ (int)ErrorCode.Dispatcher_Intermediate_GetOrCreateActivation,
+ "Unable to create local activation for message {Message}.",
+ msg);
+ }
- var nonExistentActivation = new GrainAddress { SiloAddress = msg.TargetSilo, GrainId = msg.TargetGrain };
- ProcessRequestToInvalidActivation(msg, nonExistentActivation, null, "Non-existent activation");
+ var partialAddress = new GrainAddress { SiloAddress = msg.TargetSilo, GrainId = msg.TargetGrain };
+ ProcessRequestToInvalidActivation(msg, partialAddress, null, "Unable to create local activation");
}
}
- internal void SendRejection(Message msg, Message.RejectionTypes rejectionType, string reason)
+ internal void SendRejection(Message msg, Message.RejectionTypes rejectionType, string reason, Exception? exception = null)
{
MessagingInstruments.OnRejectedMessage(msg);
@@ -616,7 +618,7 @@ internal void SendRejection(Message msg, Message.RejectionTypes rejectionType, s
else
{
if (string.IsNullOrEmpty(reason)) reason = $"Rejection from silo {this._siloAddress} - Unknown reason.";
- var error = this.messageFactory.CreateRejectionResponse(msg, rejectionType, reason);
+ var error = this.messageFactory.CreateRejectionResponse(msg, rejectionType, reason, exception);
// rejection msgs are always originated in the local silo, they are never remote.
this.ReceiveMessage(error);
}
diff --git a/src/Orleans.Runtime/Networking/GatewayInboundConnection.cs b/src/Orleans.Runtime/Networking/GatewayInboundConnection.cs
index b3e62b279b..ffd4799163 100644
--- a/src/Orleans.Runtime/Networking/GatewayInboundConnection.cs
+++ b/src/Orleans.Runtime/Networking/GatewayInboundConnection.cs
@@ -172,7 +172,8 @@ public void FailMessage(Message msg, string reason)
this.messageCenter.SendRejection(
msg,
Message.RejectionTypes.Transient,
- $"Silo {this.myAddress} is rejecting message: {msg}. Reason = {reason}");
+ $"Silo {this.myAddress} is rejecting message: {msg}. Reason = {reason}",
+ new SiloUnavailableException());
}
else
{
diff --git a/src/Orleans.Runtime/Networking/SiloConnection.cs b/src/Orleans.Runtime/Networking/SiloConnection.cs
index 7580107287..50916c4988 100644
--- a/src/Orleans.Runtime/Networking/SiloConnection.cs
+++ b/src/Orleans.Runtime/Networking/SiloConnection.cs
@@ -6,6 +6,7 @@
using System.Text;
using System.Threading.Tasks;
using Microsoft.AspNetCore.Connections;
+using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Orleans.Configuration;
using Orleans.Messaging;
@@ -96,7 +97,7 @@ protected override void OnReceivedMessage(Message msg)
}
MessagingInstruments.OnRejectedMessage(msg);
- var rejection = this.MessageFactory.CreateRejectionResponse(msg, Message.RejectionTypes.Unrecoverable, "Silo stopping");
+ var rejection = this.MessageFactory.CreateRejectionResponse(msg, Message.RejectionTypes.Unrecoverable, "Silo stopping", new SiloUnavailableException());
this.Send(rejection);
return;
}
@@ -197,7 +198,7 @@ protected override async Task RunInternal()
}
finally
{
- if (!(this.RemoteSiloAddress is null))
+ if (this.RemoteSiloAddress is not null)
{
this.connectionManager.OnConnectionTerminated(this.RemoteSiloAddress, this, error);
}
@@ -243,11 +244,11 @@ protected override bool PrepareMessageForSend(Message msg)
// Don't send messages that have already timed out
if (msg.IsExpired)
{
- this.MessagingTrace.OnDropExpiredMessage(msg, MessagingInstruments.Phase.Send);
+ this.MessagingTrace.OnDropExpiredMessage(msg, MessagingInstruments.Phase.Send);
if (msg.IsPing())
{
- this.Log.LogWarning("Droppping expired ping message {Message}", msg);
+ this.Log.LogWarning("Dropping expired ping message {Message}", msg);
}
return false;
@@ -286,7 +287,11 @@ public void FailMessage(Message msg, string reason)
if (this.Log.IsEnabled(LogLevel.Debug)) this.Log.LogDebug((int)ErrorCode.MessagingSendingRejection, "Silo {SiloAddress} is rejecting message: {Message}. Reason = {Reason}", this.LocalSiloAddress, msg, reason);
// Done retrying, send back an error instead
- this.messageCenter.SendRejection(msg, Message.RejectionTypes.Transient, $"Silo {this.LocalSiloAddress} is rejecting message: {msg}. Reason = {reason}");
+ this.messageCenter.SendRejection(
+ msg,
+ Message.RejectionTypes.Transient,
+ $"Silo {this.LocalSiloAddress} is rejecting message: {msg}. Reason = {reason}",
+ new SiloUnavailableException());
}
else
{
diff --git a/src/Orleans.Runtime/Scheduler/ClosureWorkItem.cs b/src/Orleans.Runtime/Scheduler/ClosureWorkItem.cs
index 070461f845..4ed505332f 100644
--- a/src/Orleans.Runtime/Scheduler/ClosureWorkItem.cs
+++ b/src/Orleans.Runtime/Scheduler/ClosureWorkItem.cs
@@ -82,4 +82,28 @@ public override async void Execute()
public override IGrainContext GrainContext { get; }
}
+
+ internal sealed class ClosureWorkItem(Action closure, TState state, string name, IGrainContext grainContext) : WorkItemBase
+ {
+ private readonly TaskCompletionSource _completion = new(TaskCreationOptions.RunContinuationsAsynchronously);
+
+ public override string Name => name ?? AsyncClosureWorkItem.GetMethodName(closure);
+ public Task Task => _completion.Task;
+
+ public override void Execute()
+ {
+ try
+ {
+ RequestContext.Clear();
+ closure(state);
+ _completion.TrySetResult(true);
+ }
+ catch (Exception exception)
+ {
+ _completion.TrySetException(exception);
+ }
+ }
+
+ public override IGrainContext GrainContext { get; } = grainContext;
+ }
}
diff --git a/src/Orleans.Runtime/Scheduler/SchedulerExtensions.cs b/src/Orleans.Runtime/Scheduler/SchedulerExtensions.cs
index 1d3a705334..ee3cc7510e 100644
--- a/src/Orleans.Runtime/Scheduler/SchedulerExtensions.cs
+++ b/src/Orleans.Runtime/Scheduler/SchedulerExtensions.cs
@@ -1,3 +1,4 @@
+#nullable enable
using System;
using System.Threading.Tasks;
@@ -19,6 +20,13 @@ internal static Task QueueTask(this WorkItemGroup scheduler, Func taskFunc
return workItem.Task;
}
+ internal static Task QueueAction(this IGrainContext targetContext, Action action, TState state, string? name = null)
+ {
+ var workItem = new ClosureWorkItem(action, state, name, targetContext);
+ targetContext.Scheduler.QueueWorkItem(workItem);
+ return workItem.Task;
+ }
+
internal static Task RunOrQueueTask(this IGrainContext targetContext, Func taskFunc)
{
var currentContext = RuntimeContext.Current;
diff --git a/src/Orleans.Runtime/Scheduler/WorkItemGroup.cs b/src/Orleans.Runtime/Scheduler/WorkItemGroup.cs
index cc24ad28c8..9eca80c08a 100644
--- a/src/Orleans.Runtime/Scheduler/WorkItemGroup.cs
+++ b/src/Orleans.Runtime/Scheduler/WorkItemGroup.cs
@@ -10,11 +10,10 @@
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Orleans.Configuration;
-using Orleans.Internal;
namespace Orleans.Runtime.Scheduler;
-[DebuggerDisplay("WorkItemGroup Name={Name} State={state}")]
+[DebuggerDisplay("WorkItemGroup Context={GrainContext} State={state}")]
internal sealed class WorkItemGroup : IThreadPoolWorkItem, IWorkItemScheduler
{
private enum WorkGroupStatus : byte
@@ -263,7 +262,7 @@ private void LogLongRunningTurn(Task task, long taskDurationMs)
_log.LogWarning(
(int)ErrorCode.SchedulerTurnTooLong3,
"Task {Task} in WorkGroup {GrainContext} took elapsed time {Duration} for execution, which is longer than {TurnWarningLengthThreshold}. Running on thread {Thread}",
- task,
+ task.AsyncState ?? task,
GrainContext.ToString(),
taskDuration.ToString("g"),
_schedulingOptions.TurnWarningLengthThreshold,
diff --git a/src/Orleans.Runtime/Silo/SiloControl.cs b/src/Orleans.Runtime/Silo/SiloControl.cs
index 5bf9c2b73e..c8415c8e43 100644
--- a/src/Orleans.Runtime/Silo/SiloControl.cs
+++ b/src/Orleans.Runtime/Silo/SiloControl.cs
@@ -2,11 +2,13 @@
using System;
using System.Collections.Generic;
using System.Linq;
+using System.Threading;
using System.Threading.Tasks;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Orleans.Configuration;
+using Orleans.GrainDirectory;
using Orleans.Metadata;
using Orleans.Providers;
using Orleans.Runtime.GrainDirectory;
@@ -47,7 +49,7 @@ public SiloControl(
ILocalSiloDetails localSiloDetails,
DeploymentLoadPublisher deploymentLoadPublisher,
Catalog catalog,
- CachedVersionSelectorManager cachedVersionSelectorManager,
+ CachedVersionSelectorManager cachedVersionSelectorManager,
CompatibilityDirectorManager compatibilityDirectorManager,
VersionSelectorManager selectorManager,
IServiceProvider services,
@@ -97,7 +99,7 @@ public Task ForceGarbageCollection()
public Task ForceActivationCollection(TimeSpan ageLimit)
{
logger.LogInformation("ForceActivationCollection");
- return _activationCollector.CollectActivations(ageLimit);
+ return _activationCollector.CollectActivations(ageLimit, CancellationToken.None);
}
public Task ForceRuntimeStatisticsCollection()
@@ -185,7 +187,7 @@ public Task GetSimpleGrainStatistics()
new SimpleGrainStatistic { SiloAddress = this.localSiloDetails.SiloAddress, GrainType = p.Key, ActivationCount = (int)p.Value }).ToArray());
}
- public Task GetDetailedGrainReport(GrainId grainId)
+ public async Task GetDetailedGrainReport(GrainId grainId)
{
logger.LogInformation("DetailedGrainReport for grain id {GrainId}", grainId);
string? grainClassName;
@@ -205,19 +207,39 @@ public Task GetDetailedGrainReport(GrainId grainId)
var a => a?.ToString()
};
- var directory = services.GetRequiredService();
+ var resolver = services.GetRequiredService();
+ var defaultDirectory = services.GetService();
+ var dir = resolver.Resolve(grainId.Type) ?? defaultDirectory;
+ GrainAddress? localCacheActivationAddress = null;
+ GrainAddress? localDirectoryActivationAddress = null;
+ SiloAddress? primaryForGrain = null;
+ if (dir is DistributedGrainDirectory distributedGrainDirectory)
+ {
+ var grainLocator = services.GetRequiredService();
+ grainLocator.TryLookupInCache(grainId, out localCacheActivationAddress);
+ localDirectoryActivationAddress = await ((DistributedGrainDirectory.ITestHooks)distributedGrainDirectory).GetLocalRecord(grainId);
+ primaryForGrain = ((DistributedGrainDirectory.ITestHooks)distributedGrainDirectory).GetPrimaryForGrain(grainId);
+ }
+ else if (dir is null && services.GetService() is { } localGrainDirectory)
+ {
+ localCacheActivationAddress = localGrainDirectory.GetLocalCacheData(grainId);
+ localDirectoryActivationAddress = localGrainDirectory.GetLocalDirectoryData(grainId).Address;
+ primaryForGrain = localGrainDirectory.GetPrimaryForGrain(grainId);
+ }
+
var report = new DetailedGrainReport()
{
Grain = grainId,
SiloAddress = localSiloDetails.SiloAddress,
SiloName = localSiloDetails.Name,
- LocalCacheActivationAddress = directory.GetLocalCacheData(grainId),
- LocalDirectoryActivationAddress = directory.GetLocalDirectoryData(grainId).Address,
- PrimaryForGrain = directory.GetPrimaryForGrain(grainId),
+ LocalCacheActivationAddress = localCacheActivationAddress,
+ LocalDirectoryActivationAddress = localDirectoryActivationAddress,
+ PrimaryForGrain = primaryForGrain,
GrainClassTypeName = grainClassName,
LocalActivation = activation,
};
- return Task.FromResult(report);
+
+ return report;
}
public Task GetActivationCount()
diff --git a/src/Orleans.Runtime/Utilities/SearchAlgorithms.cs b/src/Orleans.Runtime/Utilities/SearchAlgorithms.cs
new file mode 100644
index 0000000000..3bf7c690b6
--- /dev/null
+++ b/src/Orleans.Runtime/Utilities/SearchAlgorithms.cs
@@ -0,0 +1,94 @@
+using System;
+using System.Diagnostics;
+using System.Runtime.CompilerServices;
+
+namespace Orleans.Runtime.Utilities;
+
+internal static class SearchAlgorithms
+{
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int BinarySearch(int length, TState state, Func comparer)
+ {
+ var left = 0;
+ var right = length - 1;
+
+ while (left <= right)
+ {
+ var mid = left + (right - left) / 2;
+ var comparison = comparer(mid, state);
+
+ if (comparison == 0)
+ {
+ return mid;
+ }
+ else if (comparison < 0)
+ {
+ left = mid + 1;
+ }
+ else
+ {
+ right = mid - 1;
+ }
+ }
+
+ return -1;
+ }
+
+ // Binary search for collections of ranges along a ring (eg, a consistent hash ring), sorted by the starting point of each range.
+ // This differs from a standard binary search in that the search can wrap around from the start to the last element in the collection.
+ // This is accommodated by checking the last element in the collection before returning a negative result, to handle the case where a
+ // range wraps around from end to start. See RingRange
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int RingRangeBinarySearch(
+ int length,
+ TCollection collection,
+ Func getEntry,
+ TKey key) where TElement : IComparable
+ {
+ if (length == 0) return -1;
+
+ var left = 0;
+ var right = length - 1;
+
+ TElement entry;
+ while (left <= right)
+ {
+ var mid = left + (right - left) / 2;
+ entry = getEntry(collection, mid);
+ var comparison = entry.CompareTo(key);
+
+ if (comparison == 0)
+ {
+ return mid;
+ }
+ else if (comparison < 0)
+ {
+ // Go right.
+ left = mid + 1;
+ }
+ else
+ {
+ // Go left.
+ right = mid - 1;
+ }
+ }
+
+ // Try the last element.
+ entry = getEntry(collection, length - 1);
+ if (entry.CompareTo(key) == 0)
+ {
+ return length - 1;
+ }
+
+#if DEBUG
+ // Try the first element.
+ entry = getEntry(collection, 0);
+ if (entry.CompareTo(key) == 0)
+ {
+ Debug.Fail("Sort order invariant violated.");
+ }
+#endif
+
+ return -1;
+ }
+}
diff --git a/src/Orleans.Runtime/Utilities/StripedMpscBuffer.cs b/src/Orleans.Runtime/Utilities/StripedMpscBuffer.cs
index 8b0eb9d95d..57420c90d6 100644
--- a/src/Orleans.Runtime/Utilities/StripedMpscBuffer.cs
+++ b/src/Orleans.Runtime/Utilities/StripedMpscBuffer.cs
@@ -425,4 +425,3 @@ internal class Padding
internal const int CACHE_LINE_SIZE = 64;
#endif
}
-
diff --git a/src/Orleans.TestingHost/ConfigureDistributedGrainDirectory.cs b/src/Orleans.TestingHost/ConfigureDistributedGrainDirectory.cs
new file mode 100644
index 0000000000..f2beaaa0b5
--- /dev/null
+++ b/src/Orleans.TestingHost/ConfigureDistributedGrainDirectory.cs
@@ -0,0 +1,10 @@
+using Orleans.Hosting;
+
+namespace Orleans.TestingHost;
+
+internal class ConfigureDistributedGrainDirectory : ISiloConfigurator
+{
+#pragma warning disable ORLEANSEXP002 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+ public void Configure(ISiloBuilder siloBuilder) => siloBuilder.AddDistributedGrainDirectory();
+#pragma warning restore ORLEANSEXP002 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+}
\ No newline at end of file
diff --git a/src/Orleans.TestingHost/InProcess/InProcessMembershipTable.cs b/src/Orleans.TestingHost/InProcess/InProcessMembershipTable.cs
index bfefa09531..b89cafd241 100644
--- a/src/Orleans.TestingHost/InProcess/InProcessMembershipTable.cs
+++ b/src/Orleans.TestingHost/InProcess/InProcessMembershipTable.cs
@@ -11,7 +11,7 @@ namespace Orleans.TestingHost.InProcess;
///
/// An in-memory implementation of for testing purposes.
///
-internal sealed class InProcessMembershipTable(string clusterId) : IMembershipTableSystemTarget, IGatewayListProvider
+internal sealed class InProcessMembershipTable(string clusterId) : IMembershipTable, IGatewayListProvider
{
private readonly Table _table = new();
private readonly string _clusterId = clusterId;
diff --git a/src/Orleans.TestingHost/TestCluster.cs b/src/Orleans.TestingHost/TestCluster.cs
index b67021df35..7d0a59084b 100644
--- a/src/Orleans.TestingHost/TestCluster.cs
+++ b/src/Orleans.TestingHost/TestCluster.cs
@@ -776,20 +776,20 @@ public async ValueTask DisposeAsync()
await Task.Run(async () =>
{
- foreach (var handle in this.SecondarySilos)
+ foreach (var handle in SecondarySilos)
{
- await DisposeAsync(handle).ConfigureAwait(false);
+ await DisposeAsync(handle).ConfigureAwait(ConfigureAwaitOptions.SuppressThrowing);
}
- if (this.Primary is not null)
+ if (Primary is not null)
{
- await DisposeAsync(Primary).ConfigureAwait(false);
+ await DisposeAsync(Primary).ConfigureAwait(ConfigureAwaitOptions.SuppressThrowing);
}
- await DisposeAsync(ClientHost).ConfigureAwait(false);
+ await DisposeAsync(ClientHost).ConfigureAwait(ConfigureAwaitOptions.SuppressThrowing);
ClientHost = null;
- this.PortAllocator?.Dispose();
+ PortAllocator?.Dispose();
});
_disposed = true;
diff --git a/src/Orleans.TestingHost/TestClusterBuilder.cs b/src/Orleans.TestingHost/TestClusterBuilder.cs
index 0a55fb3030..439eafcd63 100644
--- a/src/Orleans.TestingHost/TestClusterBuilder.cs
+++ b/src/Orleans.TestingHost/TestClusterBuilder.cs
@@ -44,6 +44,7 @@ public TestClusterBuilder(short initialSilosCount)
AssumeHomogenousSilosForTesting = true
};
+ AddSiloBuilderConfigurator();
this.AddSiloBuilderConfigurator();
this.ConfigureBuilder(ConfigureDefaultPorts);
}
diff --git a/src/Orleans.TestingHost/TestClusterHostFactory.cs b/src/Orleans.TestingHost/TestClusterHostFactory.cs
index 10ee2d8c99..5e87c5b081 100644
--- a/src/Orleans.TestingHost/TestClusterHostFactory.cs
+++ b/src/Orleans.TestingHost/TestClusterHostFactory.cs
@@ -179,7 +179,7 @@ private static void TryConfigureFileLogging(IConfiguration configuration, IServi
bool.TryParse(configuration[nameof(TestClusterOptions.ConfigureFileLogging)], out bool configureFileLogging);
if (configureFileLogging)
{
- var fileName = TestingUtils.CreateTraceFileName(name, configuration[nameof(TestClusterOptions.ClusterId)]);
+ var fileName = TestingUtils.CreateTraceFileName(name, configuration["Orleans:ClusterId"]);
services.AddLogging(loggingBuilder => loggingBuilder.AddFile(fileName));
}
}
diff --git a/test/DefaultCluster.Tests/ObserverTests.cs b/test/DefaultCluster.Tests/ObserverTests.cs
index efb719dc8f..4863cffc1e 100644
--- a/test/DefaultCluster.Tests/ObserverTests.cs
+++ b/test/DefaultCluster.Tests/ObserverTests.cs
@@ -18,7 +18,7 @@ public class ObserverTests : HostedTestClusterEnsureDefaultStarted
private readonly bool[] callbacksReceived = new bool[2];
// we keep the observer objects as instance variables to prevent them from
- // being garbage collected permaturely (the runtime stores them as weak references).
+ // being garbage collected prematurely (the runtime stores them as weak references).
private SimpleGrainObserver observer1;
private SimpleGrainObserver observer2;
diff --git a/test/Directory.Build.props b/test/Directory.Build.props
index 58489d6e57..c2b7dedb73 100644
--- a/test/Directory.Build.props
+++ b/test/Directory.Build.props
@@ -23,4 +23,8 @@
+
+
+
+
diff --git a/test/Extensions/Tester.Redis/GrainDirectory/RedisGrainDirectoryTests.cs b/test/Extensions/Tester.Redis/GrainDirectory/RedisGrainDirectoryTests.cs
index b65ee7fe92..f81b77479c 100644
--- a/test/Extensions/Tester.Redis/GrainDirectory/RedisGrainDirectoryTests.cs
+++ b/test/Extensions/Tester.Redis/GrainDirectory/RedisGrainDirectoryTests.cs
@@ -18,7 +18,7 @@ public RedisGrainDirectoryTests(ITestOutputHelper testOutput) : base(testOutput)
{
}
- protected override RedisGrainDirectory GetGrainDirectory()
+ protected override RedisGrainDirectory CreateGrainDirectory()
{
TestUtils.CheckForRedis();
var configuration = TestDefaultConfiguration.RedisConnectionString;
diff --git a/test/Extensions/TesterAzureUtils/AzureGrainDirectoryTests.cs b/test/Extensions/TesterAzureUtils/AzureGrainDirectoryTests.cs
index 6fa27aef0d..1b966da485 100644
--- a/test/Extensions/TesterAzureUtils/AzureGrainDirectoryTests.cs
+++ b/test/Extensions/TesterAzureUtils/AzureGrainDirectoryTests.cs
@@ -1,7 +1,7 @@
+#nullable enable
using Microsoft.Extensions.Options;
using Orleans.Configuration;
using Orleans.GrainDirectory.AzureStorage;
-using Orleans.Runtime;
using Orleans.TestingHost.Utils;
using Tester.Directories;
using Xunit;
@@ -9,14 +9,10 @@
namespace Tester.AzureUtils
{
- [TestCategory("AzureStorage"), TestCategory("Storage")]
- public class AzureTableGrainDirectoryTests : GrainDirectoryTests
+ [TestCategory("AzureStorage"), TestCategory("Directory")]
+ public class AzureTableGrainDirectoryTests(ITestOutputHelper testOutput) : GrainDirectoryTests(testOutput)
{
- public AzureTableGrainDirectoryTests(ITestOutputHelper testOutput) : base(testOutput)
- {
- }
-
- protected override AzureTableGrainDirectory GetGrainDirectory()
+ protected override AzureTableGrainDirectory CreateGrainDirectory()
{
TestUtils.CheckForAzureStorage();
StorageEmulatorUtilities.EnsureEmulatorIsNotUsed();
@@ -56,7 +52,7 @@ public async Task UnregisterMany()
MembershipVersion = new MembershipVersion(51)
};
addresses.Add(addr);
- await this.grainDirectory.Register(addr, previousAddress: null);
+ await GrainDirectory.Register(addr, previousAddress: null);
}
// Modify the Rth entry locally, to simulate another activation tentative by another silo
@@ -71,20 +67,20 @@ public async Task UnregisterMany()
};
// Batch unregister
- await this.grainDirectory.UnregisterMany(addresses);
+ await GrainDirectory.UnregisterMany(addresses);
// Now we should only find the old Rth entry
for (int i = 0; i < N; i++)
{
if (i == R)
{
- var addr = await this.grainDirectory.Lookup(addresses[i].GrainId);
+ var addr = await GrainDirectory.Lookup(addresses[i].GrainId);
Assert.NotNull(addr);
Assert.Equal(oldActivation, addr.ActivationId);
}
else
{
- Assert.Null(await this.grainDirectory.Lookup(addresses[i].GrainId));
+ Assert.Null(await GrainDirectory.Lookup(addresses[i].GrainId));
}
}
}
diff --git a/test/Grains/TestInternalGrains/TimerGrain.cs b/test/Grains/TestInternalGrains/TimerGrain.cs
index 0e6540407f..8d2c2f3f94 100644
--- a/test/Grains/TestInternalGrains/TimerGrain.cs
+++ b/test/Grains/TestInternalGrains/TimerGrain.cs
@@ -221,7 +221,7 @@ public async Task RunSelfDisposingTimer()
timer[0].Dispose();
Assert.True(ct.IsCancellationRequested);
await Task.Delay(100);
- tcs.SetResult();
+ tcs.TrySetResult();
}
catch (Exception ex)
{
@@ -538,7 +538,7 @@ public Task StartStuckTimer(TimeSpan dueTime)
private Task TimerTick()
{
- this.completionSource.SetResult(1);
+ this.completionSource.TrySetResult(1);
return Task.CompletedTask;
}
@@ -556,7 +556,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(() =>
{
- tasks[0].SetResult(("NONE", CancellationToken.None));
+ tasks[0].TrySetResult(("NONE", CancellationToken.None));
return Task.CompletedTask;
}, new GrainTimerCreationOptions(TimeSpan.FromMilliseconds(25), TimeSpan.FromSeconds(10)) { Interleave = true }));
@@ -564,7 +564,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(() =>
{
- tasks[1].SetResult(("NONE", CancellationToken.None));
+ tasks[1].TrySetResult(("NONE", CancellationToken.None));
return Task.CompletedTask;
}, TimeSpan.FromMilliseconds(25), TimeSpan.FromSeconds(10)));
@@ -572,7 +572,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(state =>
{
- tasks[2].SetResult((state, CancellationToken.None));
+ tasks[2].TrySetResult((state, CancellationToken.None));
return Task.CompletedTask;
},
"STATE",
@@ -582,7 +582,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(state =>
{
- tasks[3].SetResult((state, CancellationToken.None));
+ tasks[3].TrySetResult((state, CancellationToken.None));
return Task.CompletedTask;
},
"STATE",
@@ -593,7 +593,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(ct =>
{
- tasks[4].SetResult(("NONE", ct));
+ tasks[4].TrySetResult(("NONE", ct));
return Task.CompletedTask;
}, new GrainTimerCreationOptions(TimeSpan.FromMilliseconds(25), TimeSpan.FromSeconds(10)) { Interleave = true }));
@@ -601,7 +601,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(ct =>
{
- tasks[5].SetResult(("NONE", ct));
+ tasks[5].TrySetResult(("NONE", ct));
return Task.CompletedTask;
}, TimeSpan.FromMilliseconds(25), TimeSpan.FromSeconds(10)));
@@ -609,7 +609,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer((state, ct) =>
{
- tasks[6].SetResult((state, ct));
+ tasks[6].TrySetResult((state, ct));
return Task.CompletedTask;
},
"STATE",
@@ -619,7 +619,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer((state, ct) =>
{
- tasks[7].SetResult((state, ct));
+ tasks[7].TrySetResult((state, ct));
return Task.CompletedTask;
},
"STATE",
@@ -878,7 +878,7 @@ public async Task RunSelfDisposingTimer()
timer[0].Dispose();
Assert.True(ct.IsCancellationRequested);
await Task.Delay(100);
- tcs.SetResult();
+ tcs.TrySetResult();
}
catch (Exception ex)
{
@@ -1042,7 +1042,7 @@ public Task StartStuckTimer(TimeSpan dueTime)
private Task TimerTick()
{
- this.completionSource.SetResult(1);
+ this.completionSource.TrySetResult(1);
return Task.CompletedTask;
}
@@ -1060,7 +1060,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(() =>
{
- tasks[0].SetResult(("NONE", CancellationToken.None));
+ tasks[0].TrySetResult(("NONE", CancellationToken.None));
return Task.CompletedTask;
}, new GrainTimerCreationOptions(TimeSpan.FromMilliseconds(25), TimeSpan.FromSeconds(10)) { Interleave = true }));
@@ -1068,7 +1068,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(() =>
{
- tasks[1].SetResult(("NONE", CancellationToken.None));
+ tasks[1].TrySetResult(("NONE", CancellationToken.None));
return Task.CompletedTask;
}, TimeSpan.FromMilliseconds(25), TimeSpan.FromSeconds(10)));
@@ -1076,7 +1076,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(state =>
{
- tasks[2].SetResult((state, CancellationToken.None));
+ tasks[2].TrySetResult((state, CancellationToken.None));
return Task.CompletedTask;
},
"STATE",
@@ -1086,7 +1086,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(state =>
{
- tasks[3].SetResult((state, CancellationToken.None));
+ tasks[3].TrySetResult((state, CancellationToken.None));
return Task.CompletedTask;
},
"STATE",
@@ -1097,7 +1097,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(ct =>
{
- tasks[4].SetResult(("NONE", ct));
+ tasks[4].TrySetResult(("NONE", ct));
return Task.CompletedTask;
}, new GrainTimerCreationOptions(TimeSpan.FromMilliseconds(25), TimeSpan.FromSeconds(10)) { Interleave = true }));
@@ -1105,7 +1105,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer(ct =>
{
- tasks[5].SetResult(("NONE", ct));
+ tasks[5].TrySetResult(("NONE", ct));
return Task.CompletedTask;
}, TimeSpan.FromMilliseconds(25), TimeSpan.FromSeconds(10)));
@@ -1113,7 +1113,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer((state, ct) =>
{
- tasks[6].SetResult((state, ct));
+ tasks[6].TrySetResult((state, ct));
return Task.CompletedTask;
},
"STATE",
@@ -1123,7 +1123,7 @@ public Task TestAllTimerOverloads()
tasks.Add(new());
timers.Add(this.RegisterGrainTimer((state, ct) =>
{
- tasks[7].SetResult((state, ct));
+ tasks[7].TrySetResult((state, ct));
return Task.CompletedTask;
},
"STATE",
@@ -1206,7 +1206,7 @@ public async Task RunSelfDisposingTimer()
{
Assert.NotNull(timer[0]);
timer[0].Dispose();
- tcs.SetResult();
+ tcs.TrySetResult();
await Task.Delay(100);
}
catch (Exception ex)
diff --git a/test/NonSilo.Tests/Directory/DirectoryMembershipSnapshotTests.cs b/test/NonSilo.Tests/Directory/DirectoryMembershipSnapshotTests.cs
new file mode 100644
index 0000000000..40264e5b5a
--- /dev/null
+++ b/test/NonSilo.Tests/Directory/DirectoryMembershipSnapshotTests.cs
@@ -0,0 +1,123 @@
+using System.Collections.Immutable;
+using Orleans.Runtime.GrainDirectory;
+using CsCheck;
+using Xunit;
+using Orleans.Configuration;
+
+namespace NonSilo.Tests.Directory;
+
+[TestCategory("BVT")]
+public sealed class DirectoryMembershipSnapshotTests
+{
+ private static readonly Gen GenClusterMembershipSnapshot = Gen.Select(Gen.UInt, Gen.Enum(), (hash, status) => (hash, status))
+ .Array[Gen.Int[1, 30]].Select((tuple) =>
+ {
+ var dict = ImmutableDictionary.CreateBuilder();
+ var port = 1;
+ foreach (var item in tuple)
+ {
+ var (hash, status) = item;
+ var addr = SiloAddress.New(new System.Net.IPEndPoint(System.Net.IPAddress.Loopback, port++), (int)hash);
+ dict.Add(addr, new ClusterMember(addr, status, $"Silo_{hash}"));
+ }
+
+ return new ClusterMembershipSnapshot(dict.ToImmutable(), new(1));
+ });
+
+ private static readonly Gen GenDirectoryMembershipSnapshot =
+ GenClusterMembershipSnapshot.SelectMany(snapshot => Gen.UInt.Array[ConsistentRingOptions.DEFAULT_NUM_VIRTUAL_RING_BUCKETS].Array[snapshot.Members.Count].Select(hashes =>
+ {
+ var i = 0;
+ return new DirectoryMembershipSnapshot(snapshot, null!, (_, _) => hashes[i++]);
+ }));
+
+ [Fact]
+ public void GetOwnerTest()
+ {
+ // As long as the cluster has at least one member, we should be able to find an owner.
+ Gen.Select(GenDirectoryMembershipSnapshot, Gen.UInt)
+ .Sample((snapshot, hash) => Assert.Equal(snapshot.Members.Length > 0, snapshot.TryGetOwner(hash, out var owner, out _)));
+ }
+
+ [Fact]
+ public void MembersDoNotIntersectTest()
+ {
+ // Member ranges should not intersect.
+ GenDirectoryMembershipSnapshot.Where(s => s.Members.Length > 0)
+ .Sample(snapshot =>
+ {
+ foreach (var range in snapshot.RangeOwners)
+ {
+ foreach (var otherRange in snapshot.RangeOwners)
+ {
+ if (range == otherRange)
+ {
+ continue;
+ }
+
+ Assert.False(range.Range.Intersects(otherRange.Range));
+ }
+ }
+ });
+ }
+
+ [Fact]
+ public void ViewCoversRingTest()
+ {
+ // The union of all member ranges should cover the entire ring.
+ GenDirectoryMembershipSnapshot.Where(s => s.Members.Length > 0)
+ .Sample(snapshot =>
+ {
+ uint sum = 0;
+ var allRanges = new List();
+ foreach (var member in snapshot.Members)
+ {
+ Assert.Equal(snapshot.GetMemberRanges(member).Sum(range => range.Size), snapshot.GetMemberRangesByPartition(member).Sum(range => range.Size));
+ foreach (var range in snapshot.GetMemberRanges(member))
+ {
+ allRanges.Add(range);
+ sum += range.Size;
+ }
+ }
+
+
+ Assert.Equal(uint.MaxValue, sum);
+
+ var allRangesCollection = RingRangeCollection.Create(allRanges);
+
+ Assert.Equal(uint.MaxValue, allRangesCollection.Size);
+ Assert.Equal(100f, allRangesCollection.SizePercent);
+ Assert.False(allRangesCollection.IsEmpty);
+ Assert.False(allRangesCollection.IsDefault);
+ Assert.True(allRangesCollection.IsFull);
+ });
+ }
+
+ [Fact]
+ public void MemberRangesCoverRingTest()
+ {
+ // The union of all member ranges should cover the entire ring.
+ GenDirectoryMembershipSnapshot.Where(s => s.Members.Length > 0)
+ .Sample(snapshot =>
+ {
+ uint sum = 0;
+ var allRanges = new List();
+ foreach (var member in snapshot.Members)
+ {
+ foreach (var range in snapshot.GetMemberRangesByPartition(member))
+ {
+ allRanges.Add(range);
+ sum += range.Size;
+ }
+ }
+
+ Assert.Equal(uint.MaxValue, sum);
+ var allRangesCollection = RingRangeCollection.Create(allRanges);
+ Assert.Equal(uint.MaxValue, allRangesCollection.Size);
+ Assert.Equal(100f, allRangesCollection.SizePercent);
+ Assert.False(allRangesCollection.IsEmpty);
+ Assert.False(allRangesCollection.IsDefault);
+ Assert.True(allRangesCollection.IsFull);
+ });
+ }
+}
diff --git a/test/NonSilo.Tests/Directory/RingRangeCollectionTests.cs b/test/NonSilo.Tests/Directory/RingRangeCollectionTests.cs
new file mode 100644
index 0000000000..22041191a9
--- /dev/null
+++ b/test/NonSilo.Tests/Directory/RingRangeCollectionTests.cs
@@ -0,0 +1,142 @@
+using System.Collections.Immutable;
+using Orleans.Runtime.GrainDirectory;
+using CsCheck;
+using Xunit;
+
+namespace NonSilo.Tests.Directory;
+
+[TestCategory("BVT")]
+public sealed class RingRangeCollectionTests
+{
+ private static readonly Gen GenRingRangeCollection = Gen.Int[0, 100].SelectMany(count => Gen.Select(Gen.UInt, Gen.Bool, static (boundary, included) => (boundary, included)).Array[count].Select(elements =>
+ {
+ var arr = ImmutableArray.CreateBuilder(elements.Length);
+ for (var i = 1; i < arr.Count;)
+ {
+ var prev = elements[i - 1];
+ var (boundary, included) = elements[i];
+ if (!included)
+ {
+ continue;
+ }
+
+ arr.Add(RingRange.Create(prev.boundary, boundary));
+ }
+
+ return RingRangeCollection.Create(arr);
+ }));
+
+ [Fact]
+ public void Contains()
+ {
+ Gen.Select(GenRingRangeCollection, Gen.UInt).Sample((ranges, point) =>
+ {
+ var doesContain = ranges.Ranges.Any(r => r.Contains(point));
+ Assert.Equal(doesContain, ranges.Contains(point));
+ });
+ }
+
+ [Fact]
+ public void Intersects()
+ {
+ GenRingRangeCollection.Sample(ranges =>
+ {
+ foreach (var range in ranges.Ranges)
+ {
+ Assert.True(ranges.Intersects(range));
+ }
+ });
+ }
+
+ [Fact]
+ public void Difference()
+ {
+ var ringWithUpdates = GenRingRangeCollection.SelectMany(original => Gen.Float[0f, 1f].Array[original.Ranges.Length].Select(diffs =>
+ {
+ // Increase or decrease the end of each range by some amount.
+ var arr = ImmutableArray.CreateBuilder(original.Ranges.Length);
+ for (var i = 0; i < diffs.Length; i++)
+ {
+ var orig = original.Ranges[i];
+ var next = original.Ranges[(i + 1) % original.Ranges.Length];
+ var maxPossibleLength = RingRange.Create(orig.Start, next.Start).Size;
+ var newEnd = orig.Start + maxPossibleLength * diffs[i];
+ arr.Add(RingRange.Create(orig.Start, (uint)Math.Clamp(orig.End + diffs[i], orig.Start + 1, next.Start)));
+ }
+
+ return (original, RingRangeCollection.Create(arr));
+ }));
+
+ ringWithUpdates.Sample((original, updated) =>
+ {
+ var additions = updated.Difference(original);
+
+ foreach (var addition in additions)
+ {
+ Assert.True(updated.Intersects(addition));
+ Assert.False(original.Intersects(addition));
+ }
+
+ var removals = updated.Difference(original);
+
+ foreach (var removal in removals)
+ {
+ Assert.False(updated.Intersects(removal));
+ Assert.True(original.Intersects(removal));
+ }
+ });
+ }
+
+ [Fact]
+ public void ContainsTest()
+ {
+ Gen.Select(GenRingRangeCollection, Gen.UInt).Sample((collection, point) =>
+ {
+ var allRanges = collection.Ranges.ToList();
+ var expectedContains = allRanges.Any(r => r.Contains(point));
+ Assert.Equal(expectedContains, collection.Contains(point));
+ var numContains = collection.Count(r => r.Contains(point));
+ Assert.Equal(expectedContains ? 1 : 0, numContains);
+ });
+ }
+
+ [Fact]
+ public void ContainsWrappedTest()
+ {
+ var ranges = new RingRange[]
+ {
+ RingRange.Create(0x10930012, 0x179C5AD4),
+ RingRange.Create(0x287844C7, 0x2B5DCCCB),
+ RingRange.Create(0x32AC80C2, 0x36F72978),
+ RingRange.Create(0x6F5C3AAC, 0x7776E202),
+ RingRange.Create(0x7D2B02F3, 0x7DF52810),
+ RingRange.Create(0xA18205D1, 0xA3A44031),
+ RingRange.Create(0xA847CD39, 0xAD6C28D0),
+ RingRange.Create(0xAF60D42F, 0xB278D2BE),
+ RingRange.Create(0xBB8EA837, 0xC61DA5E1),
+ RingRange.Create(0xF08C2237, 0xF3030A5A)
+ }.ToImmutableArray();
+ var collection = new RingRangeCollection(ranges);
+ uint point = 0x16F4037C;
+ Assert.True(ranges[0].Contains(point));
+ Assert.True(collection.Contains(point));
+
+ // Just outside the last range.
+ point = 0xF3030A5A + 1;
+ Assert.False(ranges[^1].Contains(point));
+ Assert.False(collection.Contains(point));
+
+ // Just inside the last range.
+ point = 0xF3030A5A;
+ Assert.True(ranges[^1].Contains(point));
+ Assert.True(collection.Contains(point));
+
+ // Between ranges.
+ point = 0xF08C2237 - 1;
+ Assert.False(collection.Contains(point));
+
+ // In an interior range.
+ point = 0x7D2B02F3 + 1;
+ Assert.True(collection.Contains(point));
+ }
+}
diff --git a/test/NonSilo.Tests/Directory/RingRangeTests.cs b/test/NonSilo.Tests/Directory/RingRangeTests.cs
new file mode 100644
index 0000000000..e689b015d4
--- /dev/null
+++ b/test/NonSilo.Tests/Directory/RingRangeTests.cs
@@ -0,0 +1,183 @@
+using Orleans.Runtime.GrainDirectory;
+using CsCheck;
+using Xunit;
+
+namespace NonSilo.Tests.Directory;
+
+[TestCategory("BVT")]
+public sealed class RingRangeTests
+{
+ internal static Gen GenRingRange => Gen.Select(Gen.UInt, Gen.UInt, RingRange.Create);
+
+ [Fact]
+ public void RingRangeDifference_EquallyDividedRange()
+ {
+ var previous = RingRange.Empty;
+ var current = CreateEquallyDividedRange(2, 0);
+ Assert.Empty(current.Difference(current));
+
+ Assert.Equal(current, Assert.Single(current.Difference(previous)));
+ Assert.Empty(previous.Difference(current));
+
+ var firstHalf = CreateEquallyDividedRange(2, 0);
+ var secondHalf = CreateEquallyDividedRange(2, 1);
+
+ Assert.Equal(firstHalf, Assert.Single(firstHalf.Difference(secondHalf)));
+ Assert.Equal(secondHalf, Assert.Single(secondHalf.Difference(firstHalf)));
+ }
+
+ [Fact]
+ public void ComplementDoesNotIntersect()
+ {
+ GenRingRange.Where(range => !range.IsEmpty && !range.IsFull)
+ .Sample((sample) =>
+ {
+ var inverse = sample.Complement();
+ Assert.False(sample.Intersects(inverse));
+ Assert.Empty(sample.Intersections(inverse));
+ Assert.False(sample.Contains(inverse.End));
+ var difference = Assert.Single(sample.Difference(inverse));
+ Assert.Equal(sample, difference);
+ var inverseDifference = Assert.Single(inverse.Difference(sample));
+ Assert.Equal(inverse, inverseDifference);
+ });
+ }
+
+ [Fact]
+ public void ComplementComplementIsEqual()
+ {
+ GenRingRange
+ .Sample((sample) =>
+ {
+ var inverse = sample.Complement();
+ var inverseInverse = inverse.Complement();
+ Assert.True(sample.Equals(inverseInverse));
+ });
+ }
+
+ [Fact]
+ public void RingRangeDifference_HolePunch()
+ {
+ var first = CreateEquallyDividedRange(8, 0);
+ var second = CreateEquallyDividedRange(8, 1);
+ var third = CreateEquallyDividedRange(8, 2);
+ var fullRange = RingRange.Create(first.Start, third.End);
+
+ var midPunch = fullRange.Difference(second);
+ Assert.Equal(2, midPunch.Count());
+ Assert.Equal(first, midPunch.First());
+ Assert.Equal(third, midPunch.Last());
+ }
+
+ [Fact]
+ public void RingRangeDifference_Empty()
+ {
+ var current = RingRange.Create(0x33333334, 0x66666667);
+ var result = current.Difference(RingRange.Empty);
+ Assert.Equal(current, Assert.Single(result));
+ }
+
+ [Fact]
+ public void RingRangeDifference_Empty_Two()
+ {
+ var current = RingRange.Create(0x33333334, 0x66666667);
+ var previous = RingRange.Create(uint.MaxValue - 1, 1);
+ var result = Assert.Single(current.Difference(previous));
+ Assert.Equal(current, result);
+ Assert.Equal(previous, Assert.Single(previous.Difference(current)));
+ }
+
+ [Fact]
+ public void RingRangeIntersection()
+ {
+ Assert.Empty(RingRange.Empty.Difference(RingRange.Empty));
+
+ Assert.Empty(RingRange.Full.Difference(RingRange.Full));
+
+ Assert.Equal(RingRange.Full, Assert.Single(RingRange.Full.Difference(RingRange.Empty)));
+
+ Assert.Empty(RingRange.Empty.Difference(RingRange.Full));
+ }
+
+ [Fact]
+ public void RingRangeContains()
+ {
+ Assert.False(RingRange.Empty.Contains(0));
+ Assert.False(RingRange.Empty.Contains(1));
+ Assert.False(RingRange.Empty.Contains(uint.MaxValue));
+ Assert.False(RingRange.Empty.Contains(uint.MaxValue / 2));
+
+ Assert.True(RingRange.Full.Contains(0));
+ Assert.True(RingRange.Full.Contains(1));
+ Assert.True(RingRange.Full.Contains(uint.MaxValue));
+ Assert.True(RingRange.Full.Contains(uint.MaxValue / 2));
+
+ var wrapped = RingRange.Create(uint.MaxValue - 10, 10);
+ Assert.True(wrapped.Contains(0));
+ Assert.True(wrapped.Contains(1));
+ Assert.True(wrapped.Contains(uint.MaxValue));
+ Assert.False(wrapped.Contains(uint.MaxValue / 2));
+ }
+
+ [InlineData(1)]
+ [InlineData(2)]
+ [InlineData(3)]
+ [InlineData(17)]
+ [InlineData(33)]
+ [Theory]
+ public void EqualRangeInvariants(int count)
+ {
+ var sum = 0ul;
+ var previous = RingRange.Empty;
+ for (var i = 0; i < count; i++)
+ {
+ var range = CreateEquallyDividedRange(count, i);
+ Assert.False(previous.Intersects(range));
+ sum += range.Size;
+ previous = range;
+ }
+
+ Assert.Equal(uint.MaxValue, sum);
+ }
+
+ private static RingRange CreateEquallyDividedRange(int count, int index)
+ {
+ ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(index, count, nameof(index));
+ ArgumentOutOfRangeException.ThrowIfLessThan(count, 1);
+ return Core((uint)count, (uint)index);
+ static RingRange Core(uint count, uint index)
+ {
+ ArgumentOutOfRangeException.ThrowIfGreaterThanOrEqual(index, count, nameof(index));
+
+ if (count == 1 && index == 0)
+ {
+ return RingRange.Full;
+ }
+
+ var rangeSize = (ulong)uint.MaxValue + 1;
+ var portion = rangeSize / count;
+ var remainder = rangeSize - portion * count;
+ var start = 0u;
+ for (var i = 0; i < count; i++)
+ {
+ // (Start, End]
+ var end = unchecked((uint)(start + portion));
+
+ if (remainder > 0)
+ {
+ end++;
+ remainder--;
+ }
+
+ if (i == index)
+ {
+ return RingRange.Create(start, end);
+ }
+
+ start = end;
+ }
+
+ throw new ArgumentException(null, nameof(index));
+ }
+ }
+}
diff --git a/test/NonSilo.Tests/NonSilo.Tests.csproj b/test/NonSilo.Tests/NonSilo.Tests.csproj
index 1f4a4221a1..3868e1121e 100644
--- a/test/NonSilo.Tests/NonSilo.Tests.csproj
+++ b/test/NonSilo.Tests/NonSilo.Tests.csproj
@@ -17,6 +17,7 @@
+
diff --git a/test/NonSilo.Tests/SchedulerTests/OrleansTaskSchedulerBasicTests.cs b/test/NonSilo.Tests/SchedulerTests/OrleansTaskSchedulerBasicTests.cs
index d388aaa23e..2e38765d6f 100644
--- a/test/NonSilo.Tests/SchedulerTests/OrleansTaskSchedulerBasicTests.cs
+++ b/test/NonSilo.Tests/SchedulerTests/OrleansTaskSchedulerBasicTests.cs
@@ -407,7 +407,7 @@ internal static ILoggerFactory InitSchedulerLogging()
var filters = new LoggerFilterOptions();
filters.AddFilter("Scheduler", LogLevel.Trace);
filters.AddFilter("Scheduler.WorkerPoolThread", LogLevel.Trace);
- var loggerFactory = TestingUtils.CreateDefaultLoggerFactory(TestingUtils.CreateTraceFileName("Silo", DateTime.Now.ToString("yyyyMMdd_hhmmss")), filters);
+ var loggerFactory = TestingUtils.CreateDefaultLoggerFactory(TestingUtils.CreateTraceFileName("Silo", DateTime.UtcNow.ToString("yyyyMMdd_hhmmss")), filters);
return loggerFactory;
}
}
diff --git a/test/Orleans.Serialization.FSharp.Tests/Orleans.Serialization.FSharp.Tests.fsproj b/test/Orleans.Serialization.FSharp.Tests/Orleans.Serialization.FSharp.Tests.fsproj
index 3c52da56d9..adfb7b0328 100644
--- a/test/Orleans.Serialization.FSharp.Tests/Orleans.Serialization.FSharp.Tests.fsproj
+++ b/test/Orleans.Serialization.FSharp.Tests/Orleans.Serialization.FSharp.Tests.fsproj
@@ -1,4 +1,4 @@
-
+
latest
$(TestTargetFrameworks)
@@ -7,6 +7,8 @@
+
+
diff --git a/test/Tester/Directories/GrainDirectoryTests.cs b/test/Tester/Directories/GrainDirectoryTests.cs
index 2f4486d002..d95f5bd6ca 100644
--- a/test/Tester/Directories/GrainDirectoryTests.cs
+++ b/test/Tester/Directories/GrainDirectoryTests.cs
@@ -1,151 +1,151 @@
+#nullable enable
using Microsoft.Extensions.Logging;
using Orleans.GrainDirectory;
-using Orleans.Runtime;
using TestExtensions;
using Xunit;
using Xunit.Abstractions;
-namespace Tester.Directories
+namespace Tester.Directories;
+
+// Base tests for custom Grain Directory
+public abstract class GrainDirectoryTests where TGrainDirectory : IGrainDirectory
{
- // Base tests for custom Grain Directory
- public abstract class GrainDirectoryTests where T : IGrainDirectory
+ protected readonly ILoggerFactory loggerFactory;
+ private TGrainDirectory? _directory;
+
+ protected GrainDirectoryTests(ITestOutputHelper testOutput)
{
- protected T grainDirectory;
- protected readonly ILoggerFactory loggerFactory;
+ this.loggerFactory = new LoggerFactory();
+ this.loggerFactory.AddProvider(new XunitLoggerProvider(testOutput));
+ }
+
+ protected TGrainDirectory GrainDirectory => _directory ??= CreateGrainDirectory();
- protected GrainDirectoryTests(ITestOutputHelper testOutput)
+ protected abstract TGrainDirectory CreateGrainDirectory();
+
+ [SkippableFact]
+ public async Task RegisterLookupUnregisterLookup()
+ {
+ var expected = new GrainAddress
{
- this.loggerFactory = new LoggerFactory();
- this.loggerFactory.AddProvider(new XunitLoggerProvider(testOutput));
- this.grainDirectory = GetGrainDirectory();
- }
+ ActivationId = ActivationId.NewId(),
+ GrainId = GrainId.Parse("user/somerandomuser_" + Guid.NewGuid().ToString("N")),
+ SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
+ MembershipVersion = new MembershipVersion(51)
+ };
+
+ Assert.Equal(expected, await GrainDirectory.Register(expected, null));
+
+ Assert.Equal(expected, await GrainDirectory.Lookup(expected.GrainId));
- protected abstract T GetGrainDirectory();
+ await GrainDirectory.Unregister(expected);
- [SkippableFact]
- public async Task RegisterLookupUnregisterLookup()
+ Assert.Null(await GrainDirectory.Lookup(expected.GrainId));
+ }
+
+ [SkippableFact]
+ public async Task DoNotOverwriteEntry()
+ {
+ var expected = new GrainAddress
{
- var expected = new GrainAddress
- {
- ActivationId = ActivationId.NewId(),
- GrainId = GrainId.Parse("user/somerandomuser_" + Guid.NewGuid().ToString("N")),
- SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
- MembershipVersion = new MembershipVersion(51)
- };
+ ActivationId = ActivationId.NewId(),
+ GrainId = GrainId.Parse("user/somerandomuser_" + Guid.NewGuid().ToString("N")),
+ SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
+ MembershipVersion = new MembershipVersion(51)
+ };
- Assert.Equal(expected, await this.grainDirectory.Register(expected, null));
+ var differentActivation = new GrainAddress
+ {
+ ActivationId = ActivationId.NewId(),
+ GrainId = expected.GrainId,
+ SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
+ MembershipVersion = new MembershipVersion(51)
+ };
- Assert.Equal(expected, await this.grainDirectory.Lookup(expected.GrainId));
+ var differentSilo = new GrainAddress
+ {
+ ActivationId = expected.ActivationId,
+ GrainId = expected.GrainId,
+ SiloAddress = SiloAddress.FromParsableString("10.0.23.14:1000@4583"),
+ MembershipVersion = new MembershipVersion(51)
+ };
- await this.grainDirectory.Unregister(expected);
+ Assert.Equal(expected, await GrainDirectory.Register(expected, null));
+ Assert.Equal(expected, await GrainDirectory.Register(differentActivation, null));
+ Assert.Equal(expected, await GrainDirectory.Register(differentSilo, null));
- Assert.Null(await this.grainDirectory.Lookup(expected.GrainId));
- }
+ Assert.Equal(expected, await GrainDirectory.Lookup(expected.GrainId));
+ }
- [SkippableFact]
- public async Task DoNotOverwriteEntry()
+ ///
+ /// Overwrite an existing entry if the register call includes a matching "previousAddress" parameter.
+ ///
+ [SkippableFact]
+ public async Task OverwriteEntryIfMatch()
+ {
+ var initial = new GrainAddress
{
- var expected = new GrainAddress
- {
- ActivationId = ActivationId.NewId(),
- GrainId = GrainId.Parse("user/somerandomuser_" + Guid.NewGuid().ToString("N")),
- SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
- MembershipVersion = new MembershipVersion(51)
- };
-
- var differentActivation = new GrainAddress
- {
- ActivationId = ActivationId.NewId(),
- GrainId = expected.GrainId,
- SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
- MembershipVersion = new MembershipVersion(51)
- };
-
- var differentSilo = new GrainAddress
- {
- ActivationId = expected.ActivationId,
- GrainId = expected.GrainId,
- SiloAddress = SiloAddress.FromParsableString("10.0.23.14:1000@4583"),
- MembershipVersion = new MembershipVersion(51)
- };
-
- Assert.Equal(expected, await this.grainDirectory.Register(expected, null));
- Assert.Equal(expected, await this.grainDirectory.Register(differentActivation, null));
- Assert.Equal(expected, await this.grainDirectory.Register(differentSilo, null));
-
- Assert.Equal(expected, await this.grainDirectory.Lookup(expected.GrainId));
- }
-
- ///
- /// Overwrite an existing entry if the register call includes a matching "previousAddress" parameter.
- ///
- [SkippableFact]
- public async Task OverwriteEntryIfMatch()
+ ActivationId = ActivationId.NewId(),
+ GrainId = GrainId.Parse("user/somerandomuser_" + Guid.NewGuid().ToString("N")),
+ SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
+ MembershipVersion = new MembershipVersion(51)
+ };
+
+ var differentActivation = new GrainAddress
+ {
+ ActivationId = ActivationId.NewId(),
+ GrainId = initial.GrainId,
+ SiloAddress = initial.SiloAddress,
+ MembershipVersion = initial.MembershipVersion
+ };
+
+ var differentSilo = new GrainAddress
{
- var initial = new GrainAddress
- {
- ActivationId = ActivationId.NewId(),
- GrainId = GrainId.Parse("user/somerandomuser_" + Guid.NewGuid().ToString("N")),
- SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
- MembershipVersion = new MembershipVersion(51)
- };
-
- var differentActivation = new GrainAddress
- {
- ActivationId = ActivationId.NewId(),
- GrainId = initial.GrainId,
- SiloAddress = initial.SiloAddress,
- MembershipVersion = initial.MembershipVersion
- };
-
- var differentSilo = new GrainAddress
- {
- ActivationId = initial.ActivationId,
- GrainId = initial.GrainId,
- SiloAddress = SiloAddress.FromParsableString("10.0.23.14:1000@4583"),
- MembershipVersion = initial.MembershipVersion
- };
-
- // Success, no registration exists, so the previous address is ignored.
- Assert.Equal(initial, await this.grainDirectory.Register(initial, differentSilo));
-
- // Success, the previous address matches the existing registration.
- Assert.Equal(differentActivation, await this.grainDirectory.Register(differentActivation, initial));
-
- // Failure, the previous address does not match the existing registration.
- Assert.Equal(differentActivation, await this.grainDirectory.Register(differentSilo, initial));
-
- Assert.Equal(differentActivation, await this.grainDirectory.Lookup(initial.GrainId));
- }
-
- [SkippableFact]
- public async Task DoNotDeleteDifferentActivationIdEntry()
+ ActivationId = initial.ActivationId,
+ GrainId = initial.GrainId,
+ SiloAddress = SiloAddress.FromParsableString("10.0.23.14:1000@4583"),
+ MembershipVersion = initial.MembershipVersion
+ };
+
+ // Success, no registration exists, so the previous address is ignored.
+ Assert.Equal(initial, await GrainDirectory.Register(initial, differentSilo));
+
+ // Success, the previous address matches the existing registration.
+ Assert.Equal(differentActivation, await GrainDirectory.Register(differentActivation, initial));
+
+ // Failure, the previous address does not match the existing registration.
+ Assert.Equal(differentActivation, await GrainDirectory.Register(differentSilo, initial));
+
+ Assert.Equal(differentActivation, await GrainDirectory.Lookup(initial.GrainId));
+ }
+
+ [SkippableFact]
+ public async Task DoNotDeleteDifferentActivationIdEntry()
+ {
+ var expected = new GrainAddress
{
- var expected = new GrainAddress
- {
- ActivationId = ActivationId.NewId(),
- GrainId = GrainId.Parse("user/somerandomuser_" + Guid.NewGuid().ToString("N")),
- SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
- MembershipVersion = new MembershipVersion(51)
- };
-
- var otherEntry = new GrainAddress
- {
- ActivationId = ActivationId.NewId(),
- GrainId = expected.GrainId,
- SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
- MembershipVersion = new MembershipVersion(51)
- };
-
- Assert.Equal(expected, await this.grainDirectory.Register(expected, null));
- await this.grainDirectory.Unregister(otherEntry);
- Assert.Equal(expected, await this.grainDirectory.Lookup(expected.GrainId));
- }
-
- [SkippableFact]
- public async Task LookupNotFound()
+ ActivationId = ActivationId.NewId(),
+ GrainId = GrainId.Parse("user/somerandomuser_" + Guid.NewGuid().ToString("N")),
+ SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
+ MembershipVersion = new MembershipVersion(51)
+ };
+
+ var otherEntry = new GrainAddress
{
- Assert.Null(await this.grainDirectory.Lookup(GrainId.Parse("user/somerandomuser_" + Guid.NewGuid().ToString("N"))));
- }
+ ActivationId = ActivationId.NewId(),
+ GrainId = expected.GrainId,
+ SiloAddress = SiloAddress.FromParsableString("10.0.23.12:1000@5678"),
+ MembershipVersion = new MembershipVersion(51)
+ };
+
+ Assert.Equal(expected, await GrainDirectory.Register(expected, null));
+ await GrainDirectory.Unregister(otherEntry);
+ Assert.Equal(expected, await GrainDirectory.Lookup(expected.GrainId));
+ }
+
+ [SkippableFact]
+ public async Task LookupNotFound()
+ {
+ Assert.Null(await GrainDirectory.Lookup(GrainId.Parse("user/somerandomuser_" + Guid.NewGuid().ToString("N"))));
}
}
diff --git a/test/TesterInternal/General/ConsistentRingProviderTests_Silo.cs b/test/TesterInternal/General/ConsistentRingProviderTests_Silo.cs
index b11bfc1349..149da1f0c8 100644
--- a/test/TesterInternal/General/ConsistentRingProviderTests_Silo.cs
+++ b/test/TesterInternal/General/ConsistentRingProviderTests_Silo.cs
@@ -2,7 +2,6 @@
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.Logging;
using Orleans.Configuration;
-using Orleans.Runtime;
using Orleans.Runtime.ReminderService;
using Orleans.TestingHost;
using TestExtensions;
@@ -19,7 +18,7 @@ public class ConsistentRingProviderTests_Silo : TestClusterPerTest
private readonly TimeSpan endWait = TimeSpan.FromMinutes(5);
private enum Fail { First, Random, Last }
-
+
protected override void ConfigureTestCluster(TestClusterBuilder builder)
{
builder.AddSiloBuilderConfigurator();
@@ -157,7 +156,7 @@ public async Task Ring_1F1J()
// kill a silo and join a new one in parallel
logger.LogInformation("Killing silo {SiloAddress} and joining a silo", failures[0].SiloAddress);
-
+
var tasks = new Task[2]
{
Task.Factory.StartNew(() => this.HostedCluster.StopSiloAsync(failures[0])),
@@ -282,7 +281,7 @@ private async Task> getSilosToFail(Fail fail, int numOfFailures
await tableGrain.ReadRows(tableGrainId);
SiloAddress reminderTableGrainPrimaryDirectoryAddress = (await TestUtils.GetDetailedGrainReport(this.HostedCluster.InternalGrainFactory, tableGrainId, this.HostedCluster.Primary)).PrimaryForGrain;
- // ask a detailed report from the directory partition owner, and get the actionvation addresses
+ // ask a detailed report from the directory partition owner, and get the activation addresses
var address = (await TestUtils.GetDetailedGrainReport(this.HostedCluster.InternalGrainFactory, tableGrainId, this.HostedCluster.GetSiloForAddress(reminderTableGrainPrimaryDirectoryAddress))).LocalDirectoryActivationAddress;
GrainAddress reminderGrainActivation = address;
diff --git a/test/TesterInternal/GrainDirectory/DistributedGrainDirectoryTests.cs b/test/TesterInternal/GrainDirectory/DistributedGrainDirectoryTests.cs
new file mode 100644
index 0000000000..092c64de4a
--- /dev/null
+++ b/test/TesterInternal/GrainDirectory/DistributedGrainDirectoryTests.cs
@@ -0,0 +1,22 @@
+#nullable enable
+using Microsoft.Extensions.DependencyInjection;
+using Orleans.GrainDirectory;
+using Orleans.Runtime.GrainDirectory;
+using Orleans.TestingHost;
+using Tester.Directories;
+using TestExtensions;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace UnitTests.GrainDirectory;
+
+[TestCategory("BVT"), TestCategory("Directory")]
+public sealed class DefaultGrainDirectoryTests(DefaultClusterFixture fixture, ITestOutputHelper output)
+ : GrainDirectoryTests(output), IClassFixture
+{
+ private readonly TestCluster _testCluster = fixture.HostedCluster;
+ private InProcessSiloHandle Primary => (InProcessSiloHandle)_testCluster.Primary;
+
+ protected override IGrainDirectory CreateGrainDirectory() =>
+ Primary.SiloHost.Services.GetRequiredService().DefaultGrainDirectory;
+}
diff --git a/test/TesterInternal/GrainDirectory/GrainDirectoryResilienceTests.cs b/test/TesterInternal/GrainDirectory/GrainDirectoryResilienceTests.cs
new file mode 100644
index 0000000000..b026061baf
--- /dev/null
+++ b/test/TesterInternal/GrainDirectory/GrainDirectoryResilienceTests.cs
@@ -0,0 +1,182 @@
+#nullable enable
+using System.Diagnostics;
+using System.Globalization;
+using Microsoft.Extensions.DependencyInjection;
+using Microsoft.Extensions.Logging;
+using Orleans.Configuration;
+using Orleans.Runtime.GrainDirectory;
+using Orleans.Serialization;
+using Orleans.Storage;
+using Orleans.TestingHost;
+using Xunit;
+using Xunit.Abstractions;
+
+namespace UnitTests.GrainDirectory;
+
+internal interface IMyDirectoryTestGrain : IGrainWithIntegerKey
+{
+ ValueTask Ping();
+}
+
+[CollectionAgeLimit(Minutes = 1.01)]
+internal class MyDirectoryTestGrain : Grain, IMyDirectoryTestGrain
+{
+ public ValueTask Ping() => default;
+}
+
+[TestCategory("SlowBVT"), TestCategory("Directory")]
+public sealed class GrainDirectoryResilienceTests
+{
+ ///
+ /// Cluster chaos test: tests directory functionality & integrity while starting/stopping/killing silos frequently.
+ ///
+ ///
+ [Fact]
+ public async Task ElasticChaos()
+ {
+ var testClusterBuilder = new TestClusterBuilder(1);
+ testClusterBuilder.AddSiloBuilderConfigurator();
+ var testCluster = testClusterBuilder.Build();
+ await testCluster.DeployAsync();
+ var log = testCluster.ServiceProvider.GetRequiredService>();
+ log.LogInformation("ServiceId: '{ServiceId}'", testCluster.Options.ServiceId);
+ log.LogInformation("ClusterId: '{ClusterId}'.", testCluster.Options.ClusterId);
+
+ var cts = new CancellationTokenSource(TimeSpan.FromMinutes(5));
+ var reconfigurationTimer = CoarseStopwatch.StartNew();
+ var upperLimit = 10;
+ var lowerLimit = 1; // Membership is kept on the primary, so we can't go below 1
+ var target = upperLimit;
+ var idBase = 0L;
+ var client = ((InProcessSiloHandle)testCluster.Primary).SiloHost.Services.GetRequiredService();
+ const int CallsPerIteration = 100;
+ var loadTask = Task.Run(async () =>
+ {
+ while (!cts.IsCancellationRequested)
+ {
+ var time = Stopwatch.StartNew();
+ var tasks = Enumerable.Range(0, CallsPerIteration).Select(i => client.GetGrain(idBase + i).Ping().AsTask()).ToList();
+ var workTask = Task.WhenAll(tasks);
+
+ try
+ {
+ await workTask;
+ }
+ catch (SiloUnavailableException sue)
+ {
+ log.LogInformation(sue, "Swallowed transient exception.");
+ }
+ catch (OrleansMessageRejectionException omre)
+ {
+ log.LogInformation(omre, "Swallowed rejection.");
+ }
+ catch (Exception exception)
+ {
+ log.LogError(exception, "Unhandled exception.");
+ throw;
+ }
+
+ idBase += CallsPerIteration;
+ }
+ });
+
+ var chaosTask = Task.Run(async () =>
+ {
+ var clusterOperation = Task.CompletedTask;
+ while (!cts.IsCancellationRequested)
+ {
+ try
+ {
+ var remaining = TimeSpan.FromSeconds(10) - reconfigurationTimer.Elapsed;
+ if (remaining <= TimeSpan.Zero)
+ {
+ reconfigurationTimer.Restart();
+ await clusterOperation;
+
+ // Check integrity
+ var integrityChecks = new List();
+ foreach (var silo in testCluster.Silos)
+ {
+ var address = silo.SiloAddress;
+ for (var partitionIndex = 0; partitionIndex < DirectoryMembershipSnapshot.PartitionsPerSilo; partitionIndex++)
+ {
+ var replica = ((IInternalGrainFactory)client).GetSystemTarget(GrainDirectoryReplica.CreateGrainId(address, partitionIndex).GrainId);
+ integrityChecks.Add(replica.CheckIntegrityAsync().AsTask());
+ }
+ }
+
+ await Task.WhenAll(integrityChecks);
+ foreach (var task in integrityChecks)
+ {
+ await task;
+ }
+
+ clusterOperation = Task.Run(async () =>
+ {
+ var currentCount = testCluster.Silos.Count;
+
+ if (currentCount > target)
+ {
+ // Stop or kill a random silo, but not the primary (since that hosts cluster membership)
+ var victim = testCluster.SecondarySilos[Random.Shared.Next(testCluster.SecondarySilos.Count)];
+ if (currentCount % 2 == 0)
+ {
+ log.LogInformation("Stopping '{Silo}'.", victim.SiloAddress);
+ await testCluster.StopSiloAsync(victim);
+ log.LogInformation("Stopped '{Silo}'.", victim.SiloAddress);
+ }
+ else
+ {
+ log.LogInformation("Killing '{Silo}'.", victim.SiloAddress);
+ await testCluster.KillSiloAsync(victim);
+ log.LogInformation("Killed '{Silo}'.", victim.SiloAddress);
+ }
+ }
+ else if (currentCount < target)
+ {
+ log.LogInformation("Starting new silo.");
+ var result = await testCluster.StartAdditionalSiloAsync();
+ log.LogInformation("Started '{Silo}'.", result.SiloAddress);
+ }
+
+ if (currentCount <= lowerLimit)
+ {
+ target = upperLimit;
+ }
+ else if (currentCount >= upperLimit)
+ {
+ target = lowerLimit;
+ }
+ });
+ }
+ else
+ {
+ await Task.Delay(remaining);
+ }
+ }
+ catch (Exception exception)
+ {
+ log.LogInformation(exception, "Ignoring chaos exception.");
+ }
+ }
+ });
+
+ await await Task.WhenAny(loadTask, chaosTask);
+ cts.Cancel();
+ await Task.WhenAll(loadTask, chaosTask);
+ await testCluster.StopAllSilosAsync();
+ await testCluster.DisposeAsync();
+ }
+
+ private class SiloBuilderConfigurator : ISiloConfigurator
+ {
+ public void Configure(ISiloBuilder siloBuilder)
+ {
+ siloBuilder.Configure(o => o.ResponseTimeout = o.SystemResponseTimeout = TimeSpan.FromMinutes(2));
+#pragma warning disable ORLEANSEXP002 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+ siloBuilder.AddDistributedGrainDirectory();
+#pragma warning restore ORLEANSEXP002 // Type is for evaluation purposes only and is subject to change or removal in future updates. Suppress this diagnostic to proceed.
+ }
+ }
+}
+
diff --git a/test/TesterInternal/GrainDirectoryPartitionTests.cs b/test/TesterInternal/GrainDirectoryPartitionTests.cs
index 1e9f373295..6436c00526 100644
--- a/test/TesterInternal/GrainDirectoryPartitionTests.cs
+++ b/test/TesterInternal/GrainDirectoryPartitionTests.cs
@@ -11,7 +11,7 @@ namespace UnitTests;
[TestCategory("BVT"), TestCategory("GrainDirectory")]
public class GrainDirectoryPartitionTests
{
- private readonly GrainDirectoryPartition _target;
+ private readonly LocalGrainDirectoryPartition _target;
private readonly MockSiloStatusOracle _siloStatusOracle;
private static readonly SiloAddress LocalSiloAddress = SiloAddress.FromParsableString("127.0.0.1:11111@123");
private static readonly SiloAddress OtherSiloAddress = SiloAddress.FromParsableString("127.0.0.2:11111@456");
@@ -19,7 +19,7 @@ public class GrainDirectoryPartitionTests
public GrainDirectoryPartitionTests()
{
_siloStatusOracle = new MockSiloStatusOracle();
- _target = new GrainDirectoryPartition(
+ _target = new LocalGrainDirectoryPartition(
_siloStatusOracle,
Options.Create(new GrainDirectoryOptions()),
new LoggerFactory());
diff --git a/test/TesterInternal/LivenessTests/ConsistentRingProviderTests.cs b/test/TesterInternal/LivenessTests/ConsistentRingProviderTests.cs
index d4c536251c..035063eead 100644
--- a/test/TesterInternal/LivenessTests/ConsistentRingProviderTests.cs
+++ b/test/TesterInternal/LivenessTests/ConsistentRingProviderTests.cs
@@ -1,44 +1,31 @@
+using System.Collections.Immutable;
+using System.Net;
using Microsoft.Extensions.Logging.Abstractions;
using Orleans.Configuration;
-using Orleans.Runtime;
using Orleans.Runtime.ConsistentRing;
using Orleans.Streams;
+using TestExtensions;
using Xunit;
using Xunit.Abstractions;
-using TestExtensions;
-using System.Net;
-using System.Collections.Immutable;
namespace UnitTests.LivenessTests
{
- public class ConsistentRingProviderTests : IClassFixture
+ public class ConsistentRingProviderTests(ITestOutputHelper output)
{
- private readonly ITestOutputHelper output;
-
- public class Fixture
- {
- public Fixture()
- {
- }
- }
-
- public ConsistentRingProviderTests(ITestOutputHelper output)
- {
- this.output = output;
- }
+ private readonly ITestOutputHelper _output = output;
[Fact, TestCategory("Functional"), TestCategory("Liveness"), TestCategory("Ring"), TestCategory("RingStandalone")]
public void ConsistentRingProvider_Test1()
{
SiloAddress silo1 = SiloAddressUtils.NewLocalSiloAddress(0);
ConsistentRingProvider ring = new ConsistentRingProvider(silo1, NullLoggerFactory.Instance, new FakeSiloStatusOracle());
- output.WriteLine("Silo1 range: {0}. The whole ring is: {1}", ring.GetMyRange(), ring.ToString());
+ _output.WriteLine("Silo1 range: {0}. The whole ring is: {1}", ring.GetMyRange(), ring.ToString());
ring.AddServer(SiloAddressUtils.NewLocalSiloAddress(1));
- output.WriteLine("Silo1 range: {0}. The whole ring is: {1}", ring.GetMyRange(), ring.ToString());
+ _output.WriteLine("Silo1 range: {0}. The whole ring is: {1}", ring.GetMyRange(), ring.ToString());
ring.AddServer(SiloAddressUtils.NewLocalSiloAddress(2));
- output.WriteLine("Silo1 range: {0}. The whole ring is: {1}", ring.GetMyRange(), ring.ToString());
+ _output.WriteLine("Silo1 range: {0}. The whole ring is: {1}", ring.GetMyRange(), ring.ToString());
}
[Fact, TestCategory("Functional"), TestCategory("Liveness"), TestCategory("Ring"), TestCategory("RingStandalone")]
@@ -46,12 +33,12 @@ public void ConsistentRingProvider_Test2()
{
SiloAddress silo1 = SiloAddressUtils.NewLocalSiloAddress(0);
VirtualBucketsRingProvider ring = new VirtualBucketsRingProvider(silo1, NullLoggerFactory.Instance, 30, new FakeSiloStatusOracle());
- output.WriteLine("\n\n*** Silo1 range: {0}.\n*** The whole ring with 1 silo is:\n{1}\n\n", ring.GetMyRange(), ring.ToString());
+ _output.WriteLine("\n\n*** Silo1 range: {0}.\n*** The whole ring with 1 silo is:\n{1}\n\n", ring.GetMyRange(), ring.ToString());
for (int i = 1; i <= 10; i++)
{
ring.SiloStatusChangeNotification(SiloAddressUtils.NewLocalSiloAddress(i), SiloStatus.Active);
- output.WriteLine("\n\n*** Silo1 range: {0}.\n*** The whole ring with {1} silos is:\n{2}\n\n", ring.GetMyRange(), i + 1, ring.ToString());
+ _output.WriteLine("\n\n*** Silo1 range: {0}.\n*** The whole ring with {1} silos is:\n{2}\n\n", ring.GetMyRange(), i + 1, ring.ToString());
}
}
@@ -65,12 +52,12 @@ public void ConsistentRingProvider_Test3()
Random random = new Random();
SiloAddress silo1 = SiloAddressUtils.NewLocalSiloAddress(random.Next(100000));
VirtualBucketsRingProvider ring = new VirtualBucketsRingProvider(silo1, NullLoggerFactory.Instance, 50, new FakeSiloStatusOracle());
-
+
for (int i = 1; i <= NUM_SILOS - 1; i++)
{
ring.SiloStatusChangeNotification(SiloAddressUtils.NewLocalSiloAddress(random.Next(100000)), SiloStatus.Active);
}
-
+
var siloRanges = ring.GetRanges();
var sortedSiloRanges = siloRanges.ToList();
sortedSiloRanges.Sort((t1, t2) => t1.Item2.RangePercentage().CompareTo(t2.Item2.RangePercentage()));
@@ -79,7 +66,7 @@ public void ConsistentRingProvider_Test3()
foreach (var siloRange in siloRanges)
{
List agentRanges = new List();
- for(int i=0; i < NUM_AGENTS; i++)
+ for (int i = 0; i < NUM_AGENTS; i++)
{
IRingRangeInternal agentRange = (IRingRangeInternal)RangeFactory.GetEquallyDividedSubRange(siloRange.Value, NUM_AGENTS, i);
agentRanges.Add(agentRange);
@@ -89,18 +76,18 @@ public void ConsistentRingProvider_Test3()
Dictionary> queueHistogram = GetQueueHistogram(allAgentRanges, (int)NUM_QUEUES);
string str = Utils.EnumerableToString(sortedSiloRanges,
- tuple => string.Format("Silo {0} -> Range {1:0.000}%, {2} queues: {3}",
+ tuple => string.Format("Silo {0} -> Range {1:0.000}%, {2} queues: {3}",
tuple.Item1,
tuple.Item2.RangePercentage(),
queueHistogram[tuple.Item1].Sum(),
Utils.EnumerableToString(queueHistogram[tuple.Item1])), "\n");
- output.WriteLine("\n\n*** The whole ring with {0} silos is:\n{1}\n\n", NUM_SILOS, str);
+ _output.WriteLine("\n\n*** The whole ring with {0} silos is:\n{1}\n\n", NUM_SILOS, str);
- output.WriteLine("Total number of queues is: {0}", queueHistogram.Values.Sum(list => list.Sum()));
- output.WriteLine("Expected average range per silo is: {0:0.00}%, expected #queues per silo is: {1:0.00}, expected #queues per agent is: {2:0.000}.",
+ _output.WriteLine("Total number of queues is: {0}", queueHistogram.Values.Sum(list => list.Sum()));
+ _output.WriteLine("Expected average range per silo is: {0:0.00}%, expected #queues per silo is: {1:0.00}, expected #queues per agent is: {2:0.000}.",
100.0 / NUM_SILOS, NUM_QUEUES / NUM_SILOS, NUM_QUEUES / (NUM_SILOS * NUM_AGENTS));
- output.WriteLine("Min #queues per silo is: {0}, Max #queues per silo is: {1}.",
+ _output.WriteLine("Min #queues per silo is: {0}, Max #queues per silo is: {1}.",
queueHistogram.Values.Min(list => list.Sum()), queueHistogram.Values.Max(list => list.Sum()));
}
@@ -182,7 +169,7 @@ public bool TryGetSiloName(SiloAddress siloAddress, out string siloName)
}
public bool UnSubscribeFromSiloStatusEvents(ISiloStatusListener observer) => _subscribers.Remove(observer);
- public ImmutableArray GetActiveSilos() => [.. GetApproximateSiloStatuses(onlyActive: true).Keys];
+ public ImmutableArray GetActiveSilos() => [.. GetApproximateSiloStatuses(onlyActive: true).Keys];
}
}
}
diff --git a/test/Transactions/Orleans.Transactions.Tests/Hosting/TransactionTestExtensions.cs b/test/Transactions/Orleans.Transactions.Tests/Hosting/TransactionTestExtensions.cs
index 92cda04554..bfc89aaf89 100644
--- a/test/Transactions/Orleans.Transactions.Tests/Hosting/TransactionTestExtensions.cs
+++ b/test/Transactions/Orleans.Transactions.Tests/Hosting/TransactionTestExtensions.cs
@@ -7,18 +7,18 @@ public static class TransactionTestExtensions
{
public static ISiloBuilder ConfigureTracingForTransactionTests(this ISiloBuilder clientBuilder)
{
- clientBuilder.Services.ConfiguretracingForTransactionTests();
+ clientBuilder.Services.ConfigureTracingForTransactionTests();
return clientBuilder;
}
public static IClientBuilder ConfigureTracingForTransactionTests(this IClientBuilder clientBuilder)
{
- clientBuilder.Services.ConfiguretracingForTransactionTests();
+ clientBuilder.Services.ConfigureTracingForTransactionTests();
return clientBuilder;
}
// control the tracing of the various components of the transaction mechanism
- public static IServiceCollection ConfiguretracingForTransactionTests(this IServiceCollection services)
+ public static IServiceCollection ConfigureTracingForTransactionTests(this IServiceCollection services)
{
return services.AddLogging(loggingBuilder =>
{