Skip to content

Commit

Permalink
extend metrics with retry-related instruments (#2789)
Browse files Browse the repository at this point in the history
* extend metrics with retry-related instruments

* fix sdk.NowTime test shenanigans

* regen s3
  • Loading branch information
lucix-aws committed Sep 19, 2024
1 parent 977daad commit c595cfb
Show file tree
Hide file tree
Showing 14 changed files with 752 additions and 21 deletions.
51 changes: 51 additions & 0 deletions aws/retry/attempt_metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
package retry

import (
"context"

"github.com/aws/smithy-go/metrics"
"github.com/aws/smithy-go/middleware"
)

type attemptMetrics struct {
Attempts metrics.Int64Counter
Errors metrics.Int64Counter

AttemptDuration metrics.Float64Histogram
}

func newAttemptMetrics(meter metrics.Meter) (*attemptMetrics, error) {
m := &attemptMetrics{}
var err error

m.Attempts, err = meter.Int64Counter("client.call.attempts", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "{attempt}"
o.Description = "The number of attempts for an individual operation"
})
if err != nil {
return nil, err
}
m.Errors, err = meter.Int64Counter("client.call.errors", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "{error}"
o.Description = "The number of errors for an operation"
})
if err != nil {
return nil, err
}
m.AttemptDuration, err = meter.Float64Histogram("client.call.attempt_duration", func(o *metrics.InstrumentOptions) {
o.UnitLabel = "s"
o.Description = "The time it takes to connect to the service, send the request, and get back HTTP status code and headers (including time queued waiting to be sent)"
})
if err != nil {
return nil, err
}

return m, nil
}

func withOperationMetadata(ctx context.Context) metrics.RecordMetricOption {
return func(o *metrics.RecordMetricOptions) {
o.Properties.Set("rpc.service", middleware.GetServiceID(ctx))
o.Properties.Set("rpc.method", middleware.GetOperationName(ctx))
}
}
40 changes: 38 additions & 2 deletions aws/retry/middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ import (
"strings"
"time"

"github.com/aws/aws-sdk-go-v2/aws/middleware/private/metrics"
privatemetrics "github.com/aws/aws-sdk-go-v2/aws/middleware/private/metrics"
internalcontext "github.com/aws/aws-sdk-go-v2/internal/context"

"github.com/aws/aws-sdk-go-v2/aws"
awsmiddle "github.com/aws/aws-sdk-go-v2/aws/middleware"
awshttp "github.com/aws/aws-sdk-go-v2/aws/transport/http"
"github.com/aws/aws-sdk-go-v2/internal/sdk"
"github.com/aws/smithy-go/logging"
"github.com/aws/smithy-go/metrics"
smithymiddle "github.com/aws/smithy-go/middleware"
"github.com/aws/smithy-go/tracing"
"github.com/aws/smithy-go/transport/http"
Expand All @@ -39,6 +41,9 @@ type Attempt struct {
// attempts are reached.
LogAttempts bool

// A Meter instance for recording retry-related metrics.
OperationMeter metrics.Meter

retryer aws.RetryerV2
requestCloner RequestCloner
}
Expand All @@ -56,6 +61,10 @@ func NewAttemptMiddleware(retryer aws.Retryer, requestCloner RequestCloner, optF
for _, fn := range optFns {
fn(m)
}
if m.OperationMeter == nil {
m.OperationMeter = metrics.NopMeterProvider{}.Meter("")
}

return m
}

Expand All @@ -81,6 +90,11 @@ func (r *Attempt) HandleFinalize(ctx context.Context, in smithymiddle.FinalizeIn
maxAttempts := r.retryer.MaxAttempts()
releaseRetryToken := nopRelease

retryMetrics, err := newAttemptMetrics(r.OperationMeter)
if err != nil {
return out, metadata, err
}

for {
attemptNum++
attemptInput := in
Expand All @@ -102,7 +116,19 @@ func (r *Attempt) HandleFinalize(ctx context.Context, in smithymiddle.FinalizeIn
attemptCtx, span := tracing.StartSpan(attemptCtx, "Attempt", func(o *tracing.SpanOptions) {
o.Properties.Set("operation.attempt", attemptNum)
})
retryMetrics.Attempts.Add(ctx, 1, withOperationMetadata(ctx))

start := sdk.NowTime()
out, attemptResult, releaseRetryToken, err = r.handleAttempt(attemptCtx, attemptInput, releaseRetryToken, next)
elapsed := sdk.NowTime().Sub(start)

retryMetrics.AttemptDuration.Record(ctx, float64(elapsed)/1e9, withOperationMetadata(ctx))
if err != nil {
retryMetrics.Errors.Add(ctx, 1, withOperationMetadata(ctx), func(o *metrics.RecordMetricOptions) {
o.Properties.Set("exception.type", errorType(err))
})
}

span.End()

attemptClockSkew, _ = awsmiddle.GetAttemptSkew(attemptResult.ResponseMetadata)
Expand Down Expand Up @@ -245,7 +271,7 @@ func (r *Attempt) handleAttempt(
// that time. Potentially early exist if the sleep is canceled via the
// context.
retryDelay, reqErr := r.retryer.RetryDelay(attemptNum, err)
mctx := metrics.Context(ctx)
mctx := privatemetrics.Context(ctx)
if mctx != nil {
attempt, err := mctx.Data().LatestAttempt()
if err != nil {
Expand Down Expand Up @@ -388,3 +414,13 @@ func AddRetryMiddlewares(stack *smithymiddle.Stack, options AddRetryMiddlewaresO
}
return nil
}

// deduces the modeled exception type from an attempt error, which is almost
// always wrapped
func errorType(err error) string {
var terr *awshttp.ResponseError
if errors.As(err, &terr) {
return fmt.Sprintf("%T", terr.Err)
}
return fmt.Sprintf("%T", err)
}
11 changes: 7 additions & 4 deletions aws/retry/middleware_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,16 @@ func TestAttemptMiddleware(t *testing.T) {
MaxAttempts: 3,
},
{
AttemptNum: 2,
AttemptTime: time.Date(2020, 8, 19, 10, 21, 30, 0, time.UTC),
AttemptNum: 2,
// note that here and everywhere else, time goes up two
// additional minutes because of the metrics calling
// sdk.NowTime twice
AttemptTime: time.Date(2020, 8, 19, 10, 23, 30, 0, time.UTC),
MaxAttempts: 3,
},
{
AttemptNum: 3,
AttemptTime: time.Date(2020, 8, 19, 10, 22, 30, 0, time.UTC),
AttemptTime: time.Date(2020, 8, 19, 10, 26, 30, 0, time.UTC),
MaxAttempts: 3,
},
},
Expand Down Expand Up @@ -369,7 +372,7 @@ func TestAttemptMiddleware(t *testing.T) {
},
{
AttemptNum: 2,
AttemptTime: time.Date(2020, 8, 19, 10, 21, 30, 0, time.UTC),
AttemptTime: time.Date(2020, 8, 19, 10, 23, 30, 0, time.UTC),
MaxAttempts: 3,
},
},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ public void writeAdditionalFiles(
SymbolProvider symbolProvider,
GoDelegator delegator
) {
delegator.useShapeWriter(settings.getService(model), this::generateRetryMiddlewareHelpers);
delegator.useShapeWriter(settings.getService(model), writer ->
generateRetryMiddlewareHelpers(writer, settings.getModuleName()));
}

private void generateRetryMiddlewareHelpers(GoWriter writer) {
private void generateRetryMiddlewareHelpers(GoWriter writer, String moduleName) {
writer
.addUseImports(SmithyGoDependency.SMITHY_MIDDLEWARE)
.addUseImports(SmithyGoDependency.SMITHY_HTTP_TRANSPORT)
Expand All @@ -47,6 +48,7 @@ private void generateRetryMiddlewareHelpers(GoWriter writer) {
func addRetry(stack *middleware.Stack, o Options) error {
attempt := retry.NewAttemptMiddleware(o.Retryer, smithyhttp.RequestCloner, func(m *retry.Attempt) {
m.LogAttempts = o.ClientLogMode.IsRetries()
m.OperationMeter = o.MeterProvider.Meter($S)
})
if err := stack.Finalize.Insert(attempt, "Signing", middleware.Before); err != nil {
return err
Expand All @@ -55,6 +57,6 @@ func addRetry(stack *middleware.Stack, o Options) error {
return err
}
return nil
}"""));
}""", moduleName));
}
}
Loading

0 comments on commit c595cfb

Please sign in to comment.