diff --git a/src/Aspire.Hosting/Dcp/DcpOptions.cs b/src/Aspire.Hosting/Dcp/DcpOptions.cs index 17646041a8..ff09df1d79 100644 --- a/src/Aspire.Hosting/Dcp/DcpOptions.cs +++ b/src/Aspire.Hosting/Dcp/DcpOptions.cs @@ -77,6 +77,10 @@ internal sealed class DcpOptions /// public bool? RandomizePorts { get; set; } + public int KubernetesConfigReadRetryCount { get; set; } = 300; + + public int KubernetesConfigReadRetryIntervalMilliseconds { get; set; } = 100; + public void ApplyApplicationConfiguration(DistributedApplicationOptions appOptions, IConfiguration dcpPublisherConfiguration, IConfiguration publishingConfiguration, IConfiguration coreConfiguration) { string? publisher = publishingConfiguration[nameof(PublishingOptions.Publisher)]; @@ -125,6 +129,9 @@ public void ApplyApplicationConfiguration(DistributedApplicationOptions appOptio DependencyCheckTimeout = coreConfiguration.GetValue("DOTNET_ASPIRE_DEPENDENCY_CHECK_TIMEOUT", DependencyCheckTimeout); } + KubernetesConfigReadRetryCount = dcpPublisherConfiguration.GetValue(nameof(KubernetesConfigReadRetryCount), KubernetesConfigReadRetryCount); + KubernetesConfigReadRetryIntervalMilliseconds = dcpPublisherConfiguration.GetValue(nameof(KubernetesConfigReadRetryIntervalMilliseconds), KubernetesConfigReadRetryIntervalMilliseconds); + if (!string.IsNullOrEmpty(dcpPublisherConfiguration[nameof(ResourceNameSuffix)])) { ResourceNameSuffix = dcpPublisherConfiguration[nameof(ResourceNameSuffix)]; diff --git a/src/Aspire.Hosting/Dcp/KubernetesService.cs b/src/Aspire.Hosting/Dcp/KubernetesService.cs index c96b761fcc..1d88327bdd 100644 --- a/src/Aspire.Hosting/Dcp/KubernetesService.cs +++ b/src/Aspire.Hosting/Dcp/KubernetesService.cs @@ -2,11 +2,16 @@ // The .NET Foundation licenses this file to you under the MIT license. using System.Collections.Immutable; +using System.Diagnostics; using System.Runtime.CompilerServices; using Aspire.Hosting.Dcp.Model; using k8s; using k8s.Exceptions; using k8s.Models; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; +using Polly; +using Polly.Retry; namespace Aspire.Hosting.Dcp; @@ -39,7 +44,7 @@ Task GetLogStreamAsync( CancellationToken cancellationToken = default) where T : CustomResource; } -internal sealed class KubernetesService(Locations locations) : IKubernetesService, IDisposable +internal sealed class KubernetesService(ILogger logger, IOptions dcpOptions, Locations locations) : IKubernetesService, IDisposable { private static readonly TimeSpan s_initialRetryDelay = TimeSpan.FromMilliseconds(100); private static GroupVersion GroupVersion => Model.Dcp.GroupVersion; @@ -212,6 +217,7 @@ public Task GetLogStreamAsync( public void Dispose() { + _kubeconfigReadSemaphore?.Dispose(); _kubernetes?.Dispose(); } @@ -254,7 +260,7 @@ private async Task ExecuteWithRetry( { try { - EnsureKubernetes(); + await EnsureKubernetesAsync(cancellationToken).ConfigureAwait(false); return await operation(_kubernetes!).ConfigureAwait(false); } catch (Exception e) when (IsRetryable(e)) @@ -280,16 +286,80 @@ private async Task ExecuteWithRetry( private static bool IsRetryable(Exception ex) => ex is HttpRequestException || ex is KubeConfigException; - private void EnsureKubernetes() + private readonly SemaphoreSlim _kubeconfigReadSemaphore = new(1); + + private ResiliencePipeline? _resiliencePipeline; + + private ResiliencePipeline GetReadKubeconfigResiliencePipeline() { - if (_kubernetes != null) { return; } + if (_resiliencePipeline == null) + { + var configurationReadRetry = new RetryStrategyOptions() + { + ShouldHandle = new PredicateBuilder().Handle(), + BackoffType = DelayBackoffType.Constant, + MaxRetryAttempts = dcpOptions.Value.KubernetesConfigReadRetryCount, + MaxDelay = TimeSpan.FromMilliseconds(dcpOptions.Value.KubernetesConfigReadRetryIntervalMilliseconds), + OnRetry = (retry) => + { + logger.LogDebug( + "Waiting for Kubernetes configuration file at '{DcpKubeconfigPath}' (attempt {Iteration}).", + locations.DcpKubeconfigPath, + retry.AttemptNumber + ); + return ValueTask.CompletedTask; + } + }; - lock (Model.Dcp.Schema) + _resiliencePipeline = new ResiliencePipelineBuilder().AddRetry(configurationReadRetry).Build(); + } + + return _resiliencePipeline; + } + + private async Task EnsureKubernetesAsync(CancellationToken cancellationToken = default) + { + // Return early before waiting for the semaphore if we can. + if (_kubernetes != null) { - if (_kubernetes != null) { return; } + return; + } - var config = KubernetesClientConfiguration.BuildConfigFromConfigFile(kubeconfigPath: locations.DcpKubeconfigPath, useRelativePaths: false); - _kubernetes = new DcpKubernetesClient(config); + await _kubeconfigReadSemaphore.WaitAsync(-1, cancellationToken).ConfigureAwait(false); + + try + { + // Second chance shortcut if multiple threads got caught. + if (_kubernetes != null) + { + return; + } + + // We retry reading the kubeconfig file because DCP takes a few moments to write + // it to disk. This retry pipeline will only be invoked by a single thread the + // rest will be held at the semaphore. + var readStopwatch = new Stopwatch(); + readStopwatch.Start(); + + var pipeline = GetReadKubeconfigResiliencePipeline(); + _kubernetes = await pipeline.ExecuteAsync(async (cancellationToken) => + { + var fileInfo = new FileInfo(locations.DcpKubeconfigPath); + var config = await KubernetesClientConfiguration.BuildConfigFromConfigFileAsync(kubeconfig: fileInfo, useRelativePaths: false).ConfigureAwait(false); + readStopwatch.Stop(); + + logger.LogDebug( + "Successfully read Kubernetes configuration from '{DcpKubeconfigPath}' after {DurationMs} milliseconds.", + locations.DcpKubeconfigPath, + readStopwatch.ElapsedMilliseconds + ); + + return new DcpKubernetesClient(config); + }, cancellationToken).ConfigureAwait(false); + } + finally + { + _kubeconfigReadSemaphore.Release(); } } }