From c6105238e84e10d1f0f4f8c001894e24f199f0f7 Mon Sep 17 00:00:00 2001 From: Mario Date: Wed, 14 Feb 2024 11:59:55 +0100 Subject: [PATCH] Fix trace context propagation in the read path (#3387) * Correctly propagate trace context between query-frontend and querier * Add tracing to distributed docker-compose example * Changelog --- CHANGELOG.md | 1 + .../docker-compose/distributed/agent.river | 34 +++++++++++ .../distributed/docker-compose.yaml | 56 ++++++++++++++++++- modules/frontend/v1/frontend.go | 9 +-- modules/querier/worker/frontend_processor.go | 12 ++++ 5 files changed, 105 insertions(+), 7 deletions(-) create mode 100644 example/docker-compose/distributed/agent.river diff --git a/CHANGELOG.md b/CHANGELOG.md index e9dc5d2c93a..7a683cdc1ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -60,6 +60,7 @@ * [BUGFIX] Correctly return 400 when max limit is requested on search. [#3340](https://github.com/grafana/tempo/pull/3340) (@joe-elliott) * [BUGFIX] Fix autocomplete filters sometimes returning erroneous results. [#3339](https://github.com/grafana/tempo/pull/3339) (@joe-elliott) * [CHANGE] **Breaking Change** Deprecating vParquet v1 [#3377](https://github.com/grafana/tempo/pull/3377) (@ie-pham) +* [BUGFIX] Fixes trace context propagation between query-frontend and querier. [#3387](https://github.com/grafana/tempo/pull/3387) (@mapno) ## v2.3.1 / 2023-11-28 diff --git a/example/docker-compose/distributed/agent.river b/example/docker-compose/distributed/agent.river new file mode 100644 index 00000000000..8ebef7a4093 --- /dev/null +++ b/example/docker-compose/distributed/agent.river @@ -0,0 +1,34 @@ +logging { + level = "info" + format = "logfmt" +} + +otelcol.processor.batch "batch" { + output { + traces = [otelcol.exporter.otlphttp.tempo.input] + } +} + +otelcol.receiver.jaeger "traces" { + protocols { + grpc {} + thrift_http {} + thrift_binary {} + thrift_compact {} + } + + output { + traces = [otelcol.processor.batch.batch.input] + } +} + +// Uses HTTP to send traces to Tempo +// This allows to not use TLS and use basic auth +otelcol.exporter.otlphttp "tempo" { + client { + endpoint = "http://distributor:4318" + tls { + insecure = true + } + } +} \ No newline at end of file diff --git a/example/docker-compose/distributed/docker-compose.yaml b/example/docker-compose/distributed/docker-compose.yaml index 59a66fab9f0..24256d8b992 100644 --- a/example/docker-compose/distributed/docker-compose.yaml +++ b/example/docker-compose/distributed/docker-compose.yaml @@ -9,6 +9,11 @@ services: - ./tempo-distributed.yaml:/etc/tempo.yaml ports: - "3200" # tempo +# Uncomment the following lines to enable tracing +# environment: +# - JAEGER_AGENT_HOST=agent +# - JAEGER_SAMPLER_TYPE=const +# - JAEGER_SAMPLER_PARAM=1 ingester-0: image: grafana/tempo:latest @@ -18,6 +23,11 @@ services: - ./tempo-distributed.yaml:/etc/tempo.yaml ports: - "3200" # tempo +# Uncomment the following lines to enable tracing +# environment: +# - JAEGER_AGENT_HOST=agent +# - JAEGER_SAMPLER_TYPE=const +# - JAEGER_SAMPLER_PARAM=1 ingester-1: image: grafana/tempo:latest @@ -27,6 +37,11 @@ services: - ./tempo-distributed.yaml:/etc/tempo.yaml ports: - "3200" # tempo +# Uncomment the following lines to enable tracing +# environment: +# - JAEGER_AGENT_HOST=agent +# - JAEGER_SAMPLER_TYPE=const +# - JAEGER_SAMPLER_PARAM=1 ingester-2: image: grafana/tempo:latest @@ -36,6 +51,11 @@ services: - ./tempo-distributed.yaml:/etc/tempo.yaml ports: - "3200" # tempo +# Uncomment the following lines to enable tracing +# environment: +# - JAEGER_AGENT_HOST=agent +# - JAEGER_SAMPLER_TYPE=const +# - JAEGER_SAMPLER_PARAM=1 query-frontend: image: grafana/tempo:latest @@ -45,6 +65,11 @@ services: - ./tempo-distributed.yaml:/etc/tempo.yaml ports: - "3200:3200" # tempo +# Uncomment the following lines to enable tracing +# environment: +# - JAEGER_AGENT_HOST=agent +# - JAEGER_SAMPLER_TYPE=const +# - JAEGER_SAMPLER_PARAM=1 querier: image: grafana/tempo:latest @@ -54,6 +79,11 @@ services: - ./tempo-distributed.yaml:/etc/tempo.yaml ports: - "3200" # tempo +# Uncomment the following lines to enable tracing +# environment: +# - JAEGER_AGENT_HOST=agent +# - JAEGER_SAMPLER_TYPE=const +# - JAEGER_SAMPLER_PARAM=1 compactor: image: grafana/tempo:latest @@ -63,6 +93,11 @@ services: - ./tempo-distributed.yaml:/etc/tempo.yaml ports: - "3200" # tempo +# Uncomment the following lines to enable tracing +# environment: +# - JAEGER_AGENT_HOST=agent +# - JAEGER_SAMPLER_TYPE=const +# - JAEGER_SAMPLER_PARAM=1 metrics-generator: image: grafana/tempo:latest @@ -72,6 +107,11 @@ services: - ./tempo-distributed.yaml:/etc/tempo.yaml ports: - "3200" # tempo +# Uncomment the following lines to enable tracing +# environment: +# - JAEGER_AGENT_HOST=agent +# - JAEGER_SAMPLER_TYPE=const +# - JAEGER_SAMPLER_PARAM=1 minio: image: minio/minio:latest @@ -86,7 +126,7 @@ services: - mkdir -p /data/tempo && minio server /data --console-address ':9001' k6-tracing: - image: ghcr.io/grafana/xk6-client-tracing:v0.0.2 + image: ghcr.io/grafana/xk6-client-tracing:latest environment: - ENDPOINT=distributor:4317 restart: always @@ -117,3 +157,17 @@ services: - GF_FEATURE_TOGGLES_ENABLE=traceqlEditor ports: - "3000:3000" + + agent: + image: grafana/agent:v0.38.1 + volumes: + - /tmp/agent:/tmp/agent + - ./agent.river:/etc/agent.river + environment: + - AGENT_MODE=flow + entrypoint: + - sh + - -euc + - /bin/grafana-agent run --server.http.listen-addr=0.0.0.0:12345 /etc/agent.river + ports: + - "12345:12345" \ No newline at end of file diff --git a/modules/frontend/v1/frontend.go b/modules/frontend/v1/frontend.go index b8aa586b057..c65142d7a8d 100644 --- a/modules/frontend/v1/frontend.go +++ b/modules/frontend/v1/frontend.go @@ -13,6 +13,7 @@ import ( "github.com/grafana/dskit/httpgrpc" "github.com/grafana/dskit/services" "github.com/grafana/dskit/tenant" + "github.com/grafana/tempo/pkg/util/httpgrpcutil" "github.com/opentracing/opentracing-go" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -164,12 +165,8 @@ func (f *Frontend) RoundTripGRPC(ctx context.Context, req *httpgrpc.HTTPRequest) // Propagate trace context in gRPC too - this will be ignored if using HTTP. tracer, span := opentracing.GlobalTracer(), opentracing.SpanFromContext(ctx) if tracer != nil && span != nil { - // carrier := (*httpgrpcutil.HttpgrpcHeadersCarrier)(req) - carrier := make(opentracing.TextMapCarrier, len(req.Headers)) - for _, header := range req.Headers { - carrier.Set(header.Key, header.Values[0]) - } - err := tracer.Inject(span.Context(), opentracing.TextMap, carrier) + carrier := (*httpgrpcutil.HttpgrpcHeadersCarrier)(req) + err := tracer.Inject(span.Context(), opentracing.HTTPHeaders, carrier) if err != nil { return nil, err } diff --git a/modules/querier/worker/frontend_processor.go b/modules/querier/worker/frontend_processor.go index 2db639ece93..5e43ad618e2 100644 --- a/modules/querier/worker/frontend_processor.go +++ b/modules/querier/worker/frontend_processor.go @@ -13,6 +13,8 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/backoff" "github.com/grafana/dskit/httpgrpc" + "github.com/grafana/tempo/pkg/util/httpgrpcutil" + "github.com/opentracing/opentracing-go" "google.golang.org/grpc" "github.com/grafana/tempo/modules/frontend/v1/frontendv1pb" @@ -146,6 +148,16 @@ func (fp *frontendProcessor) runRequests(ctx context.Context, requests []*httpgr } func (fp *frontendProcessor) runRequest(ctx context.Context, request *httpgrpc.HTTPRequest) *httpgrpc.HTTPResponse { + tracer := opentracing.GlobalTracer() + // Ignore errors here. If we cannot get parent span, we just don't create new one. + parentSpanContext, _ := httpgrpcutil.GetParentSpanForRequest(tracer, request) + if parentSpanContext != nil { + queueSpan, spanCtx := opentracing.StartSpanFromContextWithTracer(ctx, tracer, "querier_processor_runRequest", opentracing.ChildOf(parentSpanContext)) + defer queueSpan.Finish() + + ctx = spanCtx + } + response, err := fp.handler.Handle(ctx, request) if err != nil { var ok bool