Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make reading Kubeconfig from DCP resilient #3132

Merged
merged 12 commits into from
Mar 26, 2024
2 changes: 1 addition & 1 deletion playground/TestShop/AppHost/appsettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"LogLevel": {
"Default": "Information",
"Microsoft.AspNetCore": "Warning",
"Aspire.Hosting.Dcp": "Warning"
"Aspire.Hosting.Dcp": "Debug"
}
}
}
7 changes: 7 additions & 0 deletions src/Aspire.Hosting/Dcp/DcpOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,10 @@ internal sealed class DcpOptions
/// </summary>
public bool? RandomizePorts { get; set; }

public int KubernetesConfigReadRetryCount { get; set; } = 10;

public int KubernetesConfigReadRetryIntervalSeconds { get; set; } = 1;

public void ApplyApplicationConfiguration(DistributedApplicationOptions appOptions, IConfiguration dcpPublisherConfiguration, IConfiguration publishingConfiguration, IConfiguration coreConfiguration)
{
string? publisher = publishingConfiguration[nameof(PublishingOptions.Publisher)];
Expand Down Expand Up @@ -125,6 +129,9 @@ public void ApplyApplicationConfiguration(DistributedApplicationOptions appOptio
DependencyCheckTimeout = coreConfiguration.GetValue<int>("DOTNET_ASPIRE_DEPENDENCY_CHECK_TIMEOUT", DependencyCheckTimeout);
}

KubernetesConfigReadRetryCount = dcpPublisherConfiguration.GetValue<int>(nameof(KubernetesConfigReadRetryCount), KubernetesConfigReadRetryCount);
KubernetesConfigReadRetryIntervalSeconds = dcpPublisherConfiguration.GetValue<int>(nameof(KubernetesConfigReadRetryIntervalSeconds), KubernetesConfigReadRetryIntervalSeconds);

if (!string.IsNullOrEmpty(dcpPublisherConfiguration[nameof(ResourceNameSuffix)]))
{
ResourceNameSuffix = dcpPublisherConfiguration[nameof(ResourceNameSuffix)];
Expand Down
50 changes: 46 additions & 4 deletions src/Aspire.Hosting/Dcp/KubernetesService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
using k8s;
using k8s.Exceptions;
using k8s.Models;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
using Polly;
using Polly.Retry;

namespace Aspire.Hosting.Dcp;

Expand Down Expand Up @@ -39,7 +43,7 @@ Task<Stream> GetLogStreamAsync<T>(
CancellationToken cancellationToken = default) where T : CustomResource;
}

internal sealed class KubernetesService(Locations locations) : IKubernetesService, IDisposable
internal sealed class KubernetesService(ILogger<KubernetesService> logger, IOptions<DcpOptions> dcpOptions, Locations locations) : IKubernetesService, IDisposable
{
private static readonly TimeSpan s_initialRetryDelay = TimeSpan.FromMilliseconds(100);
private static GroupVersion GroupVersion => Model.Dcp.GroupVersion;
Expand Down Expand Up @@ -280,16 +284,54 @@ private async Task<TResult> ExecuteWithRetry<TResult>(

private static bool IsRetryable(Exception ex) => ex is HttpRequestException || ex is KubeConfigException;

private readonly object _ensureKubernetesLock = new object();

private void EnsureKubernetes()
{
if (_kubernetes != null) { return; }

lock (Model.Dcp.Schema)
lock (_ensureKubernetesLock)
{
if (_kubernetes != null) { return; }

var config = KubernetesClientConfiguration.BuildConfigFromConfigFile(kubeconfigPath: locations.DcpKubeconfigPath, useRelativePaths: false);
_kubernetes = new DcpKubernetesClient(config);
// This retry was created in relation to this comment in GitHub:
//
// https://github.com/dotnet/aspire/issues/2422#issuecomment-2016701083
//
// It looks like it is possible for us to attempt to read the file before it is written/finished
mitchdenny marked this conversation as resolved.
Show resolved Hide resolved
// being written. We rely on DCP to write the configuration file but it may happen in parallel to
// this code executing is its possible the file does not exist, or is still being written by
// the time we get to it.
//
// This retry will retry reading the file 5 times (by default, but configurable) with a pause
mitchdenny marked this conversation as resolved.
Show resolved Hide resolved
// of 3 seconds between each attempt. This means it could take up to 15 seconds to fail. We emit
// debug level logs for each retry attempt should we need to help a customer debug this.
var configurationReadRetry = new RetryStrategyOptions()
mitchdenny marked this conversation as resolved.
Show resolved Hide resolved
{
ShouldHandle = new PredicateBuilder().Handle<IOException>(e => e.Message.StartsWith("The process cannot access the file")),
mitchdenny marked this conversation as resolved.
Show resolved Hide resolved
BackoffType = DelayBackoffType.Constant,
mitchdenny marked this conversation as resolved.
Show resolved Hide resolved
MaxRetryAttempts = dcpOptions.Value.KubernetesConfigReadRetryCount,
MaxDelay = TimeSpan.FromSeconds(dcpOptions.Value.KubernetesConfigReadRetryIntervalSeconds),
mitchdenny marked this conversation as resolved.
Show resolved Hide resolved
OnRetry = (retry) =>
{
logger.LogDebug(
mitchdenny marked this conversation as resolved.
Show resolved Hide resolved
retry.Outcome.Exception,
"Reading Kubernetes configuration file from '{DcpKubeconfigPath}' failed. Retrying. (iteration {Iteration}).",
locations.DcpKubeconfigPath,
retry.AttemptNumber
);
return ValueTask.CompletedTask;
}
};
var pipeline = new ResiliencePipelineBuilder().AddRetry(configurationReadRetry).Build();
mitchdenny marked this conversation as resolved.
Show resolved Hide resolved

pipeline.Execute(() =>
{
logger.LogDebug("Reading Kubernetes configuration from '{DcpKubeconfigPath}' on thread {ThreadId}.", locations.DcpKubeconfigPath, Environment.CurrentManagedThreadId);
var config = KubernetesClientConfiguration.BuildConfigFromConfigFile(kubeconfigPath: locations.DcpKubeconfigPath, useRelativePaths: false);
logger.LogDebug("Successfully read Kubernetes configuration from '{DcpKubeconfigPath}'.", locations.DcpKubeconfigPath);
_kubernetes = new DcpKubernetesClient(config);
});
}
}
}