Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

extend metrics with retry-related instruments #2789

Merged
merged 3 commits into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions aws/retry/attempt_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package retry

import (
"context"

"github.com/aws/smithy-go/metrics"
"github.com/aws/smithy-go/middleware"
)

type attemptMetrics struct {
Attempts metrics.Int64Counter
Errors metrics.Int64Counter

AttemptDuration metrics.Float64Histogram
}

func newAttemptMetrics(meter metrics.Meter) (*attemptMetrics, error) {
m := &attemptMetrics{}
var err error

m.Attempts, err = meter.Int64Counter("client.call.attempts", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "{attempt}"
o.Description = "The number of attempts for an individual operation"
})
if err != nil {
return nil, err
}
m.Errors, err = meter.Int64Counter("client.call.errors", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "{error}"
o.Description = "The number of errors for an operation"
})
if err != nil {
return nil, err
}
m.AttemptDuration, err = meter.Float64Histogram("client.call.attempt_duration", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "s"
o.Description = "The time it takes to connect to the service, send the request, and get back HTTP status code and headers (including time queued waiting to be sent)"
})
if err != nil {
return nil, err
}

return m, nil
}

func withOperationMetadata(ctx context.Context) metrics.RecordMetricOption {
return func(o *metrics.RecordMetricOptions) {
o.Properties.Set("rpc.service", middleware.GetServiceID(ctx))
o.Properties.Set("rpc.method", middleware.GetOperationName(ctx))
}
}
40 changes: 38 additions & 2 deletions aws/retry/middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ import (
"strings"
"time"

"github.com/aws/aws-sdk-go-v2/aws/middleware/private/metrics"
privatemetrics "github.com/aws/aws-sdk-go-v2/aws/middleware/private/metrics"
internalcontext "github.com/aws/aws-sdk-go-v2/internal/context"

"github.com/aws/aws-sdk-go-v2/aws"
awsmiddle "github.com/aws/aws-sdk-go-v2/aws/middleware"
awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http"
"github.com/aws/aws-sdk-go-v2/internal/sdk"
"github.com/aws/smithy-go/logging"
"github.com/aws/smithy-go/metrics"
smithymiddle "github.com/aws/smithy-go/middleware"
"github.com/aws/smithy-go/tracing"
"github.com/aws/smithy-go/transport/http"
Expand All @@ -39,6 +41,9 @@ type Attempt struct {
// attempts are reached.
LogAttempts bool

// A Meter instance for recording retry-related metrics.
OperationMeter metrics.Meter

retryer aws.RetryerV2
requestCloner RequestCloner
}
Expand All @@ -56,6 +61,10 @@ func NewAttemptMiddleware(retryer aws.Retryer, requestCloner RequestCloner, optF
for _, fn := range optFns {
fn(m)
}
if m.OperationMeter == nil {
m.OperationMeter = metrics.NopMeterProvider{}.Meter("")
}

return m
}

Expand All @@ -81,6 +90,11 @@ func (r *Attempt) HandleFinalize(ctx context.Context, in smithymiddle.FinalizeIn
maxAttempts := r.retryer.MaxAttempts()
releaseRetryToken := nopRelease

retryMetrics, err := newAttemptMetrics(r.OperationMeter)
if err != nil {
return out, metadata, err
}

for {
attemptNum++
attemptInput := in
Expand All @@ -102,7 +116,19 @@ func (r *Attempt) HandleFinalize(ctx context.Context, in smithymiddle.FinalizeIn
attemptCtx, span := tracing.StartSpan(attemptCtx, "Attempt", func(o *tracing.SpanOptions) {
o.Properties.Set("operation.attempt", attemptNum)
})
retryMetrics.Attempts.Add(ctx, 1, withOperationMetadata(ctx))

start := sdk.NowTime()
out, attemptResult, releaseRetryToken, err = r.handleAttempt(attemptCtx, attemptInput, releaseRetryToken, next)
elapsed := sdk.NowTime().Sub(start)

retryMetrics.AttemptDuration.Record(ctx, float64(elapsed)/1e9, withOperationMetadata(ctx))
if err != nil {
retryMetrics.Errors.Add(ctx, 1, withOperationMetadata(ctx), func(o *metrics.RecordMetricOptions) {
o.Properties.Set("exception.type", errorType(err))
})
}

span.End()

attemptClockSkew, _ = awsmiddle.GetAttemptSkew(attemptResult.ResponseMetadata)
Expand Down Expand Up @@ -245,7 +271,7 @@ func (r *Attempt) handleAttempt(
// that time. Potentially early exist if the sleep is canceled via the
// context.
retryDelay, reqErr := r.retryer.RetryDelay(attemptNum, err)
mctx := metrics.Context(ctx)
mctx := privatemetrics.Context(ctx)
if mctx != nil {
attempt, err := mctx.Data().LatestAttempt()
if err != nil {
Expand Down Expand Up @@ -388,3 +414,13 @@ func AddRetryMiddlewares(stack *smithymiddle.Stack, options AddRetryMiddlewaresO
}
return nil
}

// deduces the modeled exception type from an attempt error, which is almost
// always wrapped
func errorType(err error) string {
var terr *awshttp.ResponseError
if errors.As(err, &terr) {
return fmt.Sprintf("%T", terr.Err)
}
return fmt.Sprintf("%T", err)
}
11 changes: 7 additions & 4 deletions aws/retry/middleware_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,16 @@ func TestAttemptMiddleware(t *testing.T) {
MaxAttempts: 3,
},
{
AttemptNum: 2,
AttemptTime: time.Date(2020, 8, 19, 10, 21, 30, 0, time.UTC),
AttemptNum: 2,
// note that here and everywhere else, time goes up two
// additional minutes because of the metrics calling
// sdk.NowTime twice
AttemptTime: time.Date(2020, 8, 19, 10, 23, 30, 0, time.UTC),
MaxAttempts: 3,
},
{
AttemptNum: 3,
AttemptTime: time.Date(2020, 8, 19, 10, 22, 30, 0, time.UTC),
AttemptTime: time.Date(2020, 8, 19, 10, 26, 30, 0, time.UTC),
MaxAttempts: 3,
},
},
Expand Down Expand Up @@ -369,7 +372,7 @@ func TestAttemptMiddleware(t *testing.T) {
},
{
AttemptNum: 2,
AttemptTime: time.Date(2020, 8, 19, 10, 21, 30, 0, time.UTC),
AttemptTime: time.Date(2020, 8, 19, 10, 23, 30, 0, time.UTC),
MaxAttempts: 3,
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ public void writeAdditionalFiles(
SymbolProvider symbolProvider,
GoDelegator delegator
) {
delegator.useShapeWriter(settings.getService(model), this::generateRetryMiddlewareHelpers);
delegator.useShapeWriter(settings.getService(model), writer ->
generateRetryMiddlewareHelpers(writer, settings.getModuleName()));
}

private void generateRetryMiddlewareHelpers(GoWriter writer) {
private void generateRetryMiddlewareHelpers(GoWriter writer, String moduleName) {
writer
.addUseImports(SmithyGoDependency.SMITHY_MIDDLEWARE)
.addUseImports(SmithyGoDependency.SMITHY_HTTP_TRANSPORT)
Expand All @@ -47,6 +48,7 @@ private void generateRetryMiddlewareHelpers(GoWriter writer) {
func addRetry(stack *middleware.Stack, o Options) error {
attempt := retry.NewAttemptMiddleware(o.Retryer, smithyhttp.RequestCloner, func(m *retry.Attempt) {
m.LogAttempts = o.ClientLogMode.IsRetries()
m.OperationMeter = o.MeterProvider.Meter($S)
})
if err := stack.Finalize.Insert(attempt, "Signing", middleware.Before); err != nil {
return err
Expand All @@ -55,6 +57,6 @@ func addRetry(stack *middleware.Stack, o Options) error {
return err
}
return nil
}"""));
}""", moduleName));
}
}
Loading
Loading