diff --git a/README.md b/README.md index df93c884..1ca03a2e 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ An example of [graphql-spring-boot](https://www.graphql-java-kickstart.com/sprin ## Getting started -### Dependency management +### Dependency management with Gradle Make sure JCenter is among your repositories: @@ -56,3 +56,34 @@ you will also need to provide: A minimal but complete example is available in [InventorySchemaProvider](spring-example/src/main/java/com/apollographql/federation/springexample/InventorySchemaProvider.java). + +### Federated tracing + +To make your server generate performance traces and return them along with +responses to the Apollo Gateway (which then can send them to Apollo Graph +Manager), install the `FederatedTracingInstrumentation` into your `GraphQL` +object: + +```java +GraphQL graphql = GraphQL.newGraphQL(graphQLSchema) + .instrumentation(new FederatedTracingInstrumentation()) + .build() +``` + +It is generally desired to only create traces for requests that actually come +from Apollo Gateway, as they aren't helpful if you're connecting directly to +your backend service for testing. In order for `FederatedTracingInstrumentation` +to know if the request is coming from Gateway, you need to give it access to the +HTTP request's headers, by making the `context` part of your `ExecutionInput` +implement the `HTTPRequestHeaders` interface. For example: + +```java + HTTPRequestHeaders context = new HTTPRequestHeaders() { + @Override + public @Nullable String getHTTPRequestHeader(String caseInsensitiveHeaderName) { + return myIncomingHTTPRequest.getHeader(caseInsensitiveHeaderName); + } + } + graphql.execute(ExecutionInput.newExecutionInput(queryString).context(context)); + +``` diff --git a/graphql-java-support/pom.xml b/graphql-java-support/pom.xml index e5922dfa..2d0b150c 100644 --- a/graphql-java-support/pom.xml +++ b/graphql-java-support/pom.xml @@ -30,9 +30,21 @@ org.junit.jupiter junit-jupiter-engine + + com.google.protobuf + protobuf-java + 3.9.0 + + + + kr.motd.maven + os-maven-plugin + 1.6.0 + + org.apache.maven.plugins @@ -54,6 +66,23 @@ org.codehaus.mojo cobertura-maven-plugin + + + org.xolstice.maven.plugins + protobuf-maven-plugin + 0.6.1 + + com.google.protobuf:protoc:3.9.0:exe:${os.detected.classifier} + + + + + compile + test-compile + + + + diff --git a/graphql-java-support/src/main/java/com/apollographql/federation/graphqljava/tracing/FederatedTracingInstrumentation.java b/graphql-java-support/src/main/java/com/apollographql/federation/graphqljava/tracing/FederatedTracingInstrumentation.java new file mode 100644 index 00000000..a11dff48 --- /dev/null +++ b/graphql-java-support/src/main/java/com/apollographql/federation/graphqljava/tracing/FederatedTracingInstrumentation.java @@ -0,0 +1,329 @@ +package com.apollographql.federation.graphqljava.tracing; + +import com.google.protobuf.Timestamp; +import graphql.ExecutionResult; +import graphql.ExecutionResultImpl; +import graphql.GraphQLError; +import graphql.GraphqlErrorBuilder; +import graphql.execution.DataFetcherResult; +import graphql.execution.ExecutionPath; +import graphql.execution.ExecutionStepInfo; +import graphql.execution.instrumentation.InstrumentationContext; +import graphql.execution.instrumentation.InstrumentationState; +import graphql.execution.instrumentation.SimpleInstrumentation; +import graphql.execution.instrumentation.parameters.InstrumentationCreateStateParameters; +import graphql.execution.instrumentation.parameters.InstrumentationExecutionParameters; +import graphql.execution.instrumentation.parameters.InstrumentationFieldFetchParameters; +import graphql.execution.instrumentation.parameters.InstrumentationValidationParameters; +import graphql.language.Document; +import graphql.language.SourceLocation; +import graphql.parser.InvalidSyntaxException; +import graphql.validation.ValidationError; +import mdg.engine.proto.Reports; +import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.Base64; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.concurrent.CompletableFuture; + +import static graphql.execution.instrumentation.SimpleInstrumentationContext.whenCompleted; +import static graphql.schema.GraphQLTypeUtil.simplePrint; + +public class FederatedTracingInstrumentation extends SimpleInstrumentation { + private static final String EXTENSION_KEY = "ftv1"; + private static final String HEADER_NAME = "apollo-federation-include-trace"; + private static final String HEADER_VALUE = "ftv1"; + + private final Options options; + + private static final Logger logger = LoggerFactory.getLogger(FederatedTracingInstrumentation.class); + + public FederatedTracingInstrumentation() { + this.options = Options.newOptions(); + } + + public FederatedTracingInstrumentation(Options options) { + this.options = options; + } + + @Override + public InstrumentationState createState(InstrumentationCreateStateParameters parameters) { + // If we've been configured with a way of reading HTTP headers, we should only be active + // if the special HTTP header has the special value. If the header isn't provided or has + // a different value, return null for our state, which we'll interpret in the rest of this + // file as meaning "don't instrument". (If we haven't been given access to HTTP headers, + // always instrument.) + Object context = parameters.getExecutionInput().getContext(); + if (context instanceof HTTPRequestHeaders) { + @Nullable String headerValue = ((HTTPRequestHeaders) context).getHTTPRequestHeader(HEADER_NAME); + if (headerValue == null || !headerValue.equals(HEADER_VALUE)) { + return null; + } + } + return new FederatedTracingState(); + } + + @Override + public CompletableFuture instrumentExecutionResult(ExecutionResult executionResult, InstrumentationExecutionParameters parameters) { + final @Nullable FederatedTracingState state = parameters.getInstrumentationState(); + if (state == null) { + return super.instrumentExecutionResult(executionResult, parameters); + } + + Reports.Trace trace = state.toProto(); + + if (options.isDebuggingEnabled()) { + logger.debug(trace.toString()); + } + + // Elaborately copy the result into a builder. + // Annoyingly, ExecutionResultImpl.Builder.from takes ExecutionResultImpl rather than + // ExecutionResult in versions of GraphQL-Java older than v13 + // (see https://github.com/graphql-java/graphql-java/pull/1491), so to support older versions + // we copy the fields by hand, which does result in isDataPresent always being set (ie, + // "data": null being included in all results). The built-in TracingInstrumentation has + // the same issue. If we decide to only support v13 then this can just change to + // ExecutionResultImpl.newExecutionResult().from(executionResult). + return CompletableFuture.completedFuture(ExecutionResultImpl.newExecutionResult() + .data(executionResult.getData()) + .errors(executionResult.getErrors()) + .extensions(executionResult.getExtensions()) + .addExtension(EXTENSION_KEY, Base64.getEncoder().encodeToString(trace.toByteArray())) + .build()); + } + + @Override + public InstrumentationContext beginFieldFetch(InstrumentationFieldFetchParameters parameters) { + FederatedTracingState state = parameters.getInstrumentationState(); + if (state == null) { + return super.beginFieldFetch(parameters); + } + + SourceLocation fieldLocation = parameters.getEnvironment().getField().getSourceLocation(); + + long startNanos = System.nanoTime(); + return whenCompleted((result, throwable) -> { + long endNanos = System.nanoTime(); + + ExecutionStepInfo executionStepInfo = parameters.getEnvironment().getExecutionStepInfo(); + state.addNode( + executionStepInfo, + // relative to the trace's start_time, in ns + startNanos - state.getStartRequestNanos(), + // relative to the trace's start_time, in ns + endNanos - state.getStartRequestNanos(), + convertErrors(throwable, result), + fieldLocation + ); + }); + } + + @Override + public InstrumentationContext beginParse(InstrumentationExecutionParameters parameters) { + FederatedTracingState state = parameters.getInstrumentationState(); + if (state == null) { + return super.beginParse(parameters); + } + + return whenCompleted((document, throwable) -> { + for (GraphQLError error : convertErrors(throwable, null)) { + state.addRootError(error); + } + }); + } + + @Override + public InstrumentationContext> beginValidation(InstrumentationValidationParameters parameters) { + FederatedTracingState state = parameters.getInstrumentationState(); + if (state == null) { + return super.beginValidation(parameters); + } + + return whenCompleted((validationErrors, throwable) -> { + for (GraphQLError error : convertErrors(throwable, null)) { + state.addRootError(error); + } + + for (ValidationError error : validationErrors) { + state.addRootError(error); + } + }); + } + + // Field resolvers can throw exceptions or add errors to the DataFetchingResult. This method normalizes them to a + // single list of GraphQLErrors. + @NotNull + private List convertErrors(Throwable throwable, Object result) { + ArrayList graphQLErrors = new ArrayList<>(); + + if (throwable != null) { + if (throwable instanceof GraphQLError) { + graphQLErrors.add((GraphQLError) throwable); + } else { + GraphqlErrorBuilder errorBuilder = GraphqlErrorBuilder.newError() + .message(throwable.getMessage()); + + if (throwable instanceof InvalidSyntaxException) { + errorBuilder.location(((InvalidSyntaxException) throwable).getLocation()); + } + + graphQLErrors.add(errorBuilder.build()); + } + } + + if (result instanceof DataFetcherResult) { + DataFetcherResult theResult = (DataFetcherResult) result; + if (theResult.hasErrors()) { + graphQLErrors.addAll(theResult.getErrors()); + } + } + + return graphQLErrors; + } + + /** + * Stores timing information and a map of nodes for field information. + */ + private static class FederatedTracingState implements InstrumentationState { + private final Instant startRequestTime; + private final long startRequestNanos; + private final LinkedHashMap nodesByPath; + + private FederatedTracingState() { + // record start time when creating instrumentation state for a request + startRequestTime = Instant.now(); + startRequestNanos = System.nanoTime(); + + nodesByPath = new LinkedHashMap<>(); + nodesByPath.put(ExecutionPath.rootPath(), Reports.Trace.Node.newBuilder()); + } + + @NotNull + Reports.Trace toProto() { + return Reports.Trace.newBuilder() + .setStartTime(getStartTimestamp()) + .setEndTime(getNowTimestamp()) + .setDurationNs(getDuration()) + .setRoot(nodesByPath.get(ExecutionPath.rootPath())) + .build(); + } + + /** + * Adds node to nodesByPath and recursively ensures that all parent nodes exist. + */ + void addNode(ExecutionStepInfo stepInfo, long startFieldNanos, long endFieldNanos, List errors, SourceLocation fieldLocation) { + ExecutionPath path = stepInfo.getPath(); + Reports.Trace.Node.Builder parent = getParentNode(path); + + Reports.Trace.Node.Builder node = parent.addChildBuilder() + .setStartTime(startFieldNanos) + .setEndTime(endFieldNanos) + .setParentType(simplePrint(stepInfo.getParent().getUnwrappedNonNullType())) + .setType(stepInfo.simplePrint()) + .setResponseName(stepInfo.getResultKey()); + + String originalFieldName = stepInfo.getField().getName(); + + // set originalFieldName only when a field alias was used + if (!originalFieldName.equals(stepInfo.getResultKey())) { + node.setOriginalFieldName(originalFieldName); + } + + errors.forEach(error -> { + Reports.Trace.Error.Builder builder = node.addErrorBuilder().setMessage(error.getMessage()); + if (error.getLocations().isEmpty() && fieldLocation != null) { + builder.addLocationBuilder() + .setColumn(fieldLocation.getColumn()) + .setLine(fieldLocation.getLine()); + } else { + error.getLocations().forEach(location -> builder.addLocationBuilder() + .setColumn(location.getColumn()) + .setLine(location.getLine())); + } + }); + + nodesByPath.put(path, node); + } + + @NotNull + Reports.Trace.Node.Builder getParentNode(ExecutionPath path) { + List pathParts = path.toList(); + return nodesByPath.computeIfAbsent(ExecutionPath.fromList(pathParts.subList(0, pathParts.size() - 1)), parentPath -> { + if (parentPath.equals(ExecutionPath.rootPath())) { + // The root path is inserted at construction time, so this shouldn't happen. + throw new RuntimeException("root path missing from nodesByPath?"); + } + + // Recursively get the grandparent node and start building the parent node. + Reports.Trace.Node.Builder missingParent = getParentNode(parentPath).addChildBuilder(); + + // If the parent was a field name, then its fetcher would have been called before + // the fetcher for 'path' and it would be in nodesByPath. So the parent must be + // a list index. Note that we subtract 2 here because we want the last part of + // parentPath, not path. + Object parentLastPathPart = pathParts.get(pathParts.size() - 2); + if (!(parentLastPathPart instanceof Integer)) { + throw new RuntimeException("Unexpected missing non-index " + parentLastPathPart); + } + missingParent.setIndex((Integer) parentLastPathPart); + return missingParent; + }); + } + + void addRootError(GraphQLError error) { + Reports.Trace.Error.Builder builder = nodesByPath.get(ExecutionPath.rootPath()).addErrorBuilder() + .setMessage(error.getMessage()); + + error.getLocations().forEach(location -> builder.addLocationBuilder() + .setColumn(location.getColumn()) + .setLine(location.getLine())); + } + + long getStartRequestNanos() { + return startRequestNanos; + } + + @NotNull + private static Timestamp instantToTimestamp(@NotNull Instant startRequestTime2) { + return Timestamp.newBuilder() + .setSeconds(startRequestTime2.getEpochSecond()) + .setNanos(startRequestTime2.getNano()).build(); + } + + @NotNull + private Timestamp getStartTimestamp() { + return instantToTimestamp(startRequestTime); + } + + @NotNull + private Timestamp getNowTimestamp() { + return instantToTimestamp(Instant.now()); + } + + private long getDuration() { + return System.nanoTime() - startRequestNanos; + } + } + + public static class Options { + private final boolean debuggingEnabled; + + public Options(boolean debuggingEnabled) { + this.debuggingEnabled = debuggingEnabled; + } + + public static @NotNull Options newOptions() { + return new Options(false); + } + + public boolean isDebuggingEnabled() { + return debuggingEnabled; + } + } +} diff --git a/graphql-java-support/src/main/java/com/apollographql/federation/graphqljava/tracing/HTTPRequestHeaders.java b/graphql-java-support/src/main/java/com/apollographql/federation/graphqljava/tracing/HTTPRequestHeaders.java new file mode 100644 index 00000000..92fb563d --- /dev/null +++ b/graphql-java-support/src/main/java/com/apollographql/federation/graphqljava/tracing/HTTPRequestHeaders.java @@ -0,0 +1,20 @@ +package com.apollographql.federation.graphqljava.tracing; + +import org.jetbrains.annotations.Nullable; + +/** + * If the context object on your GraphQL ExecutionInput implements this interface, + * FederationTracingInstrumentation will generate traces only when requested to + * by the gateway in front of it. + */ +public interface HTTPRequestHeaders { + /** + * Return the value of the given HTTP header from the request, or null if the + * header is not provided. The header name should be treated as case-insensitive. + * If the header is provided multiple times, choose the first one. + * + * @param caseInsensitiveHeaderName the HTTP header name to get + * @return the header, or null if not provided + */ + @Nullable String getHTTPRequestHeader(String caseInsensitiveHeaderName); +} diff --git a/graphql-java-support/src/main/proto/reports.proto b/graphql-java-support/src/main/proto/reports.proto new file mode 100644 index 00000000..5fb1ba8b --- /dev/null +++ b/graphql-java-support/src/main/proto/reports.proto @@ -0,0 +1,492 @@ +// This file is copied over from the `apollo-engine-reporting` package, which, in turn, derives its information from +// Apollo's cloud infrastructure. It is expected that as the protobuf definition changes within the underlying system +// we are reporting to, these libraries will need to be updated in sync. +// https://github.com/apollographql/apollo-server/blob/master/packages/apollo-engine-reporting-protobuf/src/reports.proto +syntax = "proto3"; + +package mdg.engine.proto; + +import "google/protobuf/timestamp.proto"; + +message Trace { + message CachePolicy { + enum Scope { + UNKNOWN = 0; + PUBLIC = 1; + PRIVATE = 2; + } + + Scope scope = 1; + int64 max_age_ns = 2; // use 0 for absent, -1 for 0 + } + + message Details { + // The variables associated with this query (unless the reporting agent is + // configured to keep them all private). Values are JSON: ie, strings are + // enclosed in double quotes, etc. The value of a private variable is + // the empty string. + map variables_json = 4; + // Deprecated. Engineproxy did not encode variable values as JSON, so you + // couldn't tell numbers from numeric strings. Send variables_json instead. + map variables = 1; + // Optional: this is the original full query before the signature algorithm + // is applied. Engineproxy always sent this in all traces, which meant that + // literal-masking done by the signature algorithm didn't fully hide + // sensitive data from Engine servers. apollo-engine-reporting does not + // include this by default. (The Engine frontend does not currently show + // this field.) + string raw_query = 2; + // Don't include this in traces inside a FullTracesReport; the operation + // name for these traces comes from the key of the traces_per_query map. + string operation_name = 3; + } + + message Error { + string message = 1; // required + repeated Location location = 2; + uint64 time_ns = 3; + string json = 4; + } + + message HTTP { + message Values { + repeated string value = 1; + } + + enum Method { + UNKNOWN = 0; + OPTIONS = 1; + GET = 2; + HEAD = 3; + POST = 4; + PUT = 5; + DELETE = 6; + TRACE = 7; + CONNECT = 8; + PATCH = 9; + } + Method method = 1; + string host = 2; + string path = 3; + + // Should exclude manual blacklist ("Auth" by default) + map request_headers = 4; + map response_headers = 5; + + uint32 status_code = 6; + + bool secure = 8; // TLS was used + string protocol = 9; // by convention "HTTP/1.0", "HTTP/1.1", "HTTP/2" or "h2" + } + + message Location { + uint32 line = 1; + uint32 column = 2; + } + + // We store information on each resolver execution as a Node on a tree. + // The structure of the tree corresponds to the structure of the GraphQL + // response; it does not indicate the order in which resolvers were + // invoked. Note that nodes representing indexes (and the root node) + // don't contain all Node fields (eg types and times). + message Node { + // The name of the field (for Nodes representing a resolver call) or the + // index in a list (for intermediate Nodes representing elements of a list). + // field_name is the name of the field as it appears in the GraphQL + // response: ie, it may be an alias. (In that case, the original_field_name + // field holds the actual field name from the schema.) In any context where + // we're building up a path, we use the response_name rather than the + // original_field_name. + oneof id { + string response_name = 1; + uint32 index = 2; + } + + string original_field_name = 14; + + // The field's return type; e.g. "String!" for User.email:String! + string type = 3; + + // The field's parent type; e.g. "User" for User.email:String! + string parent_type = 13; + + CachePolicy cache_policy = 5; + + // relative to the trace's start_time, in ns + uint64 start_time = 8; + // relative to the trace's start_time, in ns + uint64 end_time = 9; + + repeated Error error = 11; + repeated Node child = 12; + + reserved 4; + } + + // represents a node in the query plan, under which there is a trace tree for that service fetch. + // In particular, each fetch node represents a call to an implementing service, and calls to implementing + // services may not be unique. See https://github.com/apollographql/apollo-server/blob/master/packages/apollo-gateway/src/QueryPlan.ts + // for more information and details. + message QueryPlanNode { + // This represents a set of nodes to be executed sequentially by the Gateway executor + message SequenceNode { + repeated QueryPlanNode nodes = 1; + } + // This represents a set of nodes to be executed in parallel by the Gateway executor + message ParallelNode { + repeated QueryPlanNode nodes = 1; + } + // This represents a node to send an operation to an implementing service + message FetchNode { + // XXX When we want to include more details about the sub-operation that was + // executed against this service, we should include that here in each fetch node. + // This might include an operation signature, requires directive, reference resolutions, etc. + string serviceName = 1; + + bool traceParsingFailed = 2; + + // This Trace only contains start_time, end_time, duration_ns, and root; + // all timings were calculated **on the federated service**, and clock skew + // will be handled by the ingress server. + Trace trace = 3; + + // relative to the outer trace's start_time, in ns, measured in the gateway. + uint64 sent_time_offset = 4; + + // Wallclock times measured in the gateway for when this operation was + // sent and received. + google.protobuf.Timestamp sent_time = 5; + google.protobuf.Timestamp received_time = 6; + } + + // This node represents a way to reach into the response path and attach related entities. + // XXX Flatten is really not the right name and this node may be renamed in the query planner. + message FlattenNode { + repeated ResponsePathElement response_path = 1; + QueryPlanNode node = 2; + } + message ResponsePathElement { + oneof id { + string field_name = 1; + uint32 index = 2; + } + } + oneof node { + SequenceNode sequence = 1; + ParallelNode parallel = 2; + FetchNode fetch = 3; + FlattenNode flatten = 4; + } + } + + // Wallclock time when the trace began. + google.protobuf.Timestamp start_time = 4; // required + // Wallclock time when the trace ended. + google.protobuf.Timestamp end_time = 3; // required + // High precision duration of the trace; may not equal end_time-start_time + // (eg, if your machine's clock changed during the trace). + uint64 duration_ns = 11; // required + // A tree containing information about all resolvers run directly by this + // service, including errors. + Node root = 14; + + // ------------------------------------------------------------------------- + // Fields below this line are *not* included in federated traces (the traces + // sent from federated services to the gateway). + + // In addition to details.raw_query, we include a "signature" of the query, + // which can be normalized: for example, you may want to discard aliases, drop + // unused operations and fragments, sort fields, etc. The most important thing + // here is that the signature match the signature in StatsReports. In + // StatsReports signatures show up as the key in the per_query map (with the + // operation name prepended). The signature should be a valid GraphQL query. + // All traces must have a signature; if this Trace is in a FullTracesReport + // that signature is in the key of traces_per_query rather than in this field. + // Engineproxy provides the signature in legacy_signature_needs_resigning + // instead. + string signature = 19; + + Details details = 6; + + // Note: engineproxy always sets client_name, client_version, and client_address to "none". + // apollo-engine-reporting allows for them to be set by the user. + string client_name = 7; + string client_version = 8; + string client_address = 9; + string client_reference_id = 23; + + HTTP http = 10; + + CachePolicy cache_policy = 18; + + // If this Trace was created by a gateway, this is the query plan, including + // sub-Traces for federated services. Note that the 'root' tree on the + // top-level Trace won't contain any resolvers (though it could contain errors + // that occurred in the gateway itself). + QueryPlanNode query_plan = 26; + + // Was this response served from a full query response cache? (In that case + // the node tree will have no resolvers.) + bool full_query_cache_hit = 20; + + // Was this query specified successfully as a persisted query hash? + bool persisted_query_hit = 21; + // Did this query contain both a full query string and a persisted query hash? + // (This typically means that a previous request was rejected as an unknown + // persisted query.) + bool persisted_query_register = 22; + + // Was this operation registered and a part of the safelist? + bool registered_operation = 24; + + // Was this operation forbidden due to lack of safelisting? + bool forbidden_operation = 25; + + // -------------------------------------------------------------- + // Fields below this line are only set by the old Go engineproxy. + google.protobuf.Timestamp origin_reported_start_time = 15; + google.protobuf.Timestamp origin_reported_end_time = 16; + uint64 origin_reported_duration_ns = 17; + + // Older agents (eg the Go engineproxy) relied to some degree on the Engine + // backend to run their own semi-compatible implementation of a specific + // variant of query signatures. The backend does not do this for new agents (which + // set the above 'signature' field). It used to still "re-sign" signatures + // from engineproxy, but we've now simplified the backend to no longer do this. + // Deprecated and ignored in FullTracesReports. + string legacy_signature_needs_resigning = 5; + + + // removed: Node parse = 12; Node validate = 13; + // Id128 server_id = 1; Id128 client_id = 2; + reserved 12, 13, 1, 2; +} + +// The `service` value embedded within the header key is not guaranteed to contain an actual service, +// and, in most cases, the service information is trusted to come from upstream processing. If the +// service _is_ specified in this header, then it is checked to match the context that is reporting it. +// Otherwise, the service information is deduced from the token context of the reporter and then sent +// along via other mechanisms (in Kafka, the `ReportKafkaKey). The other information (hostname, +// agent_version, etc.) is sent by the Apollo Engine Reporting agent, but we do not currently save that +// information to any of our persistent storage. +message ReportHeader { + string service = 3; + // eg "host-01.example.com" + string hostname = 5; + + // eg "engineproxy 0.1.0" + string agent_version = 6; // required + // eg "prod-4279-20160804T065423Z-5-g3cf0aa8" (taken from `git describe --tags`) + string service_version = 7; + // eg "node v4.6.0" + string runtime_version = 8; + // eg "Linux box 4.6.5-1-ec2 #1 SMP Mon Aug 1 02:31:38 PDT 2016 x86_64 GNU/Linux" + string uname = 9; + // eg "current", "prod" + string schema_tag = 10; + // The hex representation of the sha512 of the introspection response + string schema_hash = 11; +} + +message PathErrorStats { + map children = 1; + uint64 errors_count = 4; + uint64 requests_with_errors_count = 5; +} + +message ClientNameStats { + // Duration histogram for non-cache-hit queries. + // (See docs/histograms.md for the histogram format.) + repeated int64 latency_count = 1; + reserved 2; // removed: repeated uint64 error_count = 2; + // These per-version fields were used to understand what versions contributed to this sample + // when we were implementing the aggregation of this information ourselves using BigTable. + // However, since the per-version stats don't separate out latency, it makes more sense to + // have stats reported with contextual information so we can have the specific breakdown we're + // looking for. These fields are somewhat misleading as we never actually do any per-version + // awareness with anything reporting in the legacy "per_client_name" stats, and instead use + // "query_stats_with_context" to have more contextual information. + map requests_count_per_version = 3; // required + map cache_hits_per_version = 4; + map persisted_query_hits_per_version = 10; + map persisted_query_misses_per_version = 11; + map registered_operation_count_per_version = 12; + map forbidden_operation_count_per_version = 13; + repeated int64 cache_latency_count = 5; // Duration histogram; see docs/histograms.md + PathErrorStats root_error_stats = 6; + uint64 requests_with_errors_count = 7; + // TTL histograms for cache misses for the public cache. + repeated int64 public_cache_ttl_count = 8; + // TTL histograms for cache misses for the private cache. + repeated int64 private_cache_ttl_count = 9; +} + +message QueryLatencyStats { + repeated int64 latency_count = 1; + uint64 request_count = 2; + uint64 cache_hits = 3; + uint64 persisted_query_hits = 4; + uint64 persisted_query_misses = 5; + repeated int64 cache_latency_count = 6; + PathErrorStats root_error_stats = 7; + uint64 requests_with_errors_count = 8; + repeated int64 public_cache_ttl_count = 9; + repeated int64 private_cache_ttl_count = 10; + uint64 registered_operation_count = 11; + uint64 forbidden_operation_count = 12; +} + +message StatsContext { + string client_reference_id = 1; + string client_name = 2; + string client_version = 3; +} + +message ContextualizedQueryLatencyStats { + QueryLatencyStats query_latency_stats = 1; + StatsContext context = 2; +} + +message ContextualizedTypeStats { + StatsContext context = 1; + map per_type_stat = 2; +} + +message FieldStat { + string name = 2; // deprecated; only set when stored in TypeStat.field + string return_type = 3; // required; eg "String!" for User.email:String! + uint64 errors_count = 4; + uint64 count = 5; + uint64 requests_with_errors_count = 6; + repeated int64 latency_count = 8; // Duration histogram; see docs/histograms.md +} + +message TypeStat { + string name = 1; // deprecated; only set when stored in QueryStats.per_type + repeated FieldStat field = 2; // deprecated; use per_field_stat instead + // Key is (eg) "email" for User.email:String! + map per_field_stat = 3; +} + +message QueryStats { + // Either per_client_name (for back-compat) or query_stats_with_context must be specified. If both are + // specified, then query_stats_with_context will be used and per_client_name will be ignored. Although + // the fields in ClientNameStats mention things "per-version," the information in the "per-version" + // fields will only ever be over the default version, the empty String: "", if arrived at via the + // FullTracesAggregator. + map per_client_name = 1; // deprecated; use stats_with_context instead + repeated ContextualizedQueryLatencyStats query_stats_with_context = 4; + repeated TypeStat per_type = 2; // deprecated; use type_stats_with_context instead + // Key is the parent type, e.g. "User" for User.email:String! + map per_type_stat = 3; // deprecated; use type_stats_with_context instead + repeated ContextualizedTypeStats type_stats_with_context = 5; +} + +// Top-level message type for the server-side traces endpoint +message TracesReport { + ReportHeader header = 1; // required + repeated Trace trace = 2; // required +} + +message Field { + string name = 2; // required; eg "email" for User.email:String! + string return_type = 3; // required; eg "String!" for User.email:String! +} + +message Type { + string name = 1; // required; eg "User" for User.email:String! + repeated Field field = 2; +} + +message MemStats { + uint64 total_bytes = 1; // MemStats.Sys + uint64 stack_bytes = 2; // MemStats.StackSys + uint64 heap_bytes = 3; // MemStats.HeapSys + uint64 heap_released_bytes = 13; // MemStats.HeapReleased + uint64 gc_overhead_bytes = 4; // MemStats.GCSys + + uint64 stack_used_bytes = 5; // MemStats.StackInuse + uint64 heap_allocated_bytes = 6; // MemStats.HeapAlloc + uint64 heap_allocated_objects = 7; // MemStats.HeapObjects + + uint64 heap_allocated_bytes_delta = 8; // MemStats.TotalAlloc delta + uint64 heap_allocated_objects_delta = 9; // MemStats.Mallocs delta + uint64 heap_freed_objects_delta = 10; // MemStats.Frees delta + + uint64 gc_stw_ns_delta = 11; // MemStats.PauseTotalNs delta + uint64 gc_count_delta = 12; // MemStats.NumGC delta +} + +message TimeStats { + uint64 uptime_ns = 1; + uint64 real_ns_delta = 2; + uint64 user_ns_delta = 3; + uint64 sys_ns_delta = 4; +} + +// Top-level message type for the server-side stats endpoint +message StatsReport { + ReportHeader header = 1; // required + + // These fields are about properties of the engineproxy and are not generated + // from FullTracesReports. + MemStats mem_stats = 2; + TimeStats time_stats = 3; + + // Beginning of the period over which stats are collected. + google.protobuf.Timestamp start_time = 8; + // End of the period of which stats are collected. + google.protobuf.Timestamp end_time = 9; + // Only used to interpret mem_stats and time_stats; not generated from + // FullTracesReports. + uint64 realtime_duration = 10; + + + // Maps from query descriptor to QueryStats. Required unless + // legacy_per_query_missing_operation_name is set. The keys are strings of the + // form `# operationName\nsignature` (literal hash and space), with + // operationName - if there is no operation name. + map per_query = 14; + + // Older agents (Go engineproxy) didn't explicitly include the operation name + // in the key of this map, and the server had to parse it out (after a + // re-signing operation which is no longer performed). The key here is just the query + // signature. Deprecated. + map legacy_per_query_implicit_operation_name = 12; + + // Deprecated: it was useful in Optics where we had access to the whole schema + // but has not been ever used in Engine. apollo-engine-reporting will not + // send it. + repeated Type type = 13; +} + +// This is the top-level message used by the new traces ingress. This +// is designed for the apollo-engine-reporting TypeScript agent and will +// eventually be documented as a public ingress API. This message consists +// solely of traces; the equivalent of the StatsReport is automatically +// generated server-side from this message. Agents should send traces +// for all requests in this report. Generally, buffering up until a large +// size has been reached (say, 4MB) or 5-10 seconds has passed is appropriate. +message FullTracesReport { + ReportHeader header = 1; + + // key is statsReportKey (# operationName\nsignature) Note that the nested + // traces will *not* have a signature or details.operationName (because the + // key is adequate). + // + // We also assume that traces don't have + // legacy_per_query_implicit_operation_name, and we don't require them to have + // details.raw_query (which would consume a lot of space and has privacy/data + // access issues, and isn't currently exposed by our app anyway). + map traces_per_query = 5; +} + +// Just a sequence of traces with the same statsReportKey. +message Traces { + repeated Trace trace = 1; +} + +message TraceV1 { + ReportHeader header = 1; + Trace trace = 2; +} diff --git a/graphql-java-support/src/test/java/com/apollographql/federation/graphqljava/FederatedTracingInstrumentationTest.java b/graphql-java-support/src/test/java/com/apollographql/federation/graphqljava/FederatedTracingInstrumentationTest.java new file mode 100644 index 00000000..2fefbe93 --- /dev/null +++ b/graphql-java-support/src/test/java/com/apollographql/federation/graphqljava/FederatedTracingInstrumentationTest.java @@ -0,0 +1,223 @@ +package com.apollographql.federation.graphqljava; + +import com.apollographql.federation.graphqljava.tracing.FederatedTracingInstrumentation; +import com.apollographql.federation.graphqljava.tracing.HTTPRequestHeaders; +import com.google.protobuf.InvalidProtocolBufferException; +import graphql.ExecutionInput; +import graphql.GraphQL; +import graphql.GraphQLException; +import graphql.schema.GraphQLSchema; +import graphql.schema.idl.RuntimeWiring; +import graphql.schema.idl.SchemaGenerator; +import graphql.schema.idl.SchemaParser; +import graphql.schema.idl.TypeDefinitionRegistry; +import mdg.engine.proto.Reports; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.util.ArrayList; +import java.util.Base64; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class FederatedTracingInstrumentationTest { + private final String tracingSDL = TestUtils.readResource("schemas/tracing.graphql"); + private GraphQL graphql; + + @BeforeEach + void setupSchema() { + TypeDefinitionRegistry typeDefs = new SchemaParser().parse(tracingSDL); + RuntimeWiring resolvers = RuntimeWiring.newRuntimeWiring() + .type("Query", builder -> + // return two items + builder.dataFetcher("widgets", env -> { + ArrayList objects = new ArrayList<>(2); + objects.add(new Object()); + objects.add(new Object()); + return objects; + }).dataFetcher("listOfLists", env -> { + ArrayList> lists = new ArrayList<>(2); + lists.add(new ArrayList<>(2)); + lists.add(new ArrayList<>(2)); + lists.get(0).add(new Object()); + lists.get(0).add(new Object()); + lists.get(1).add(new Object()); + lists.get(1).add(new Object()); + return lists; + }) + .dataFetcher("listOfScalars", env -> new String[]{"one", "two", "three"})) + .type("Widget", builder -> + // Widget.foo works normally, Widget.bar always throws an error + builder.dataFetcher("foo", env -> "hello world") + .dataFetcher("bar", env -> { + throw new GraphQLException("whoops"); + })) + .build(); + + GraphQLSchema graphQLSchema = new SchemaGenerator().makeExecutableSchema(typeDefs, resolvers); + graphql = GraphQL.newGraphQL(graphQLSchema) + .instrumentation(new FederatedTracingInstrumentation()) + .build(); + } + + @Test + void testTracing() throws InvalidProtocolBufferException { + Map result = graphql.execute("{ widgets { foo, baz: bar }, listOfLists { foo }, listOfScalars }").toSpecification(); + + Object extensions = result.get("extensions"); + assertTrue(extensions instanceof Map); + + String ftv1 = ((Map) extensions).get("ftv1").toString(); + byte[] decoded = Base64.getDecoder().decode(ftv1); + + Reports.Trace trace = Reports.Trace.parseFrom(decoded); + assertTrue(trace.getStartTime().getSeconds() > 0, "Start time has seconds"); + assertTrue(trace.getStartTime().getNanos() > 0, "Start time has nanoseconds"); + assertTrue(trace.getEndTime().getSeconds() > 0, "End time has seconds"); + assertTrue(trace.getEndTime().getNanos() > 0, "End time has nanoseconds"); + assertTrue(trace.getDurationNs() > 0, "DurationNs is greater than zero"); + assertEquals(3, trace.getRoot().getChildCount()); + + // widgets + + Reports.Trace.Node widgets = trace.getRoot().getChild(0); + assertTrue(widgets.getStartTime() > 0, "Field start time is greater than zero"); + assertTrue(widgets.getEndTime() > 0, "Field end time is greater than zero"); + assertEquals("Query", widgets.getParentType()); + assertEquals("[Widget!]", widgets.getType()); + assertEquals("widgets", widgets.getResponseName()); + assertEquals(2, widgets.getChildCount()); + + Reports.Trace.Node secondItem = widgets.getChild(1); + assertEquals(1, secondItem.getIndex()); + assertEquals(2, secondItem.getChildCount()); + + Reports.Trace.Node foo = secondItem.getChild(0); + assertTrue(foo.getStartTime() > 0, "Field start time is greater than zero"); + assertTrue(foo.getEndTime() > 0, "Field end time is greater than zero"); + assertEquals("Widget", foo.getParentType()); + assertEquals("String", foo.getType()); + assertEquals("foo", foo.getResponseName()); + assertEquals(0, foo.getErrorCount()); + + Reports.Trace.Node bar = secondItem.getChild(1); + assertTrue(bar.getStartTime() > 0, "Field start time is greater than zero"); + assertTrue(bar.getEndTime() > 0, "Field end time is greater than zero"); + assertEquals("Widget", bar.getParentType()); + assertEquals("String", bar.getType()); + assertEquals("baz", bar.getResponseName()); + // Widget.bar is aliased as baz + assertEquals("bar", bar.getOriginalFieldName()); + assertEquals(1, bar.getErrorCount()); + + Reports.Trace.Error error = bar.getError(0); + assertEquals("whoops", error.getMessage()); + assertEquals(1, error.getLocationCount()); + assertEquals(18, error.getLocation(0).getColumn()); + assertEquals(1, error.getLocation(0).getLine()); + + // listOfLists + + Reports.Trace.Node listOfLists = trace.getRoot().getChild(1); + assertEquals(0, listOfLists.getChild(0).getIndex()); + assertEquals(2, listOfLists.getChild(0).getChildCount()); + assertEquals(1, listOfLists.getChild(1).getIndex()); + assertEquals(2, listOfLists.getChild(1).getChildCount()); + + assertEquals(0, listOfLists.getChild(0).getChild(0).getIndex()); + assertEquals(1, listOfLists.getChild(0).getChild(0).getChildCount()); + assertEquals(1, listOfLists.getChild(0).getChild(1).getIndex()); + assertEquals(1, listOfLists.getChild(0).getChild(1).getChildCount()); + + Reports.Trace.Node deeplyNestedFoo = listOfLists.getChild(0).getChild(0).getChild(0); + assertTrue(deeplyNestedFoo.getStartTime() > 0, "Field start time is greater than zero"); + assertTrue(deeplyNestedFoo.getEndTime() > 0, "Field end time is greater than zero"); + assertEquals("Widget", deeplyNestedFoo.getParentType()); + assertEquals("String", deeplyNestedFoo.getType()); + assertEquals("foo", deeplyNestedFoo.getResponseName()); + assertEquals(0, deeplyNestedFoo.getErrorCount()); + + // listOfScalars + + Reports.Trace.Node listOfScalars = trace.getRoot().getChild(2); + assertTrue(listOfScalars.getStartTime() > 0, "Field start time is greater than zero"); + assertTrue(listOfScalars.getEndTime() > 0, "Field end time is greater than zero"); + assertEquals("Query", listOfScalars.getParentType()); + assertEquals("[String!]!", listOfScalars.getType()); + assertEquals("listOfScalars", listOfScalars.getResponseName()); + } + + @Test + void testTracingParseErrors() throws InvalidProtocolBufferException { + Map result = graphql.execute("{ widgets { foo }").toSpecification(); + + Object extensions = result.get("extensions"); + assertTrue(extensions instanceof Map); + + String ftv1 = ((Map) extensions).get("ftv1").toString(); + byte[] decoded = Base64.getDecoder().decode(ftv1); + + Reports.Trace trace = Reports.Trace.parseFrom(decoded); + assertEquals(1, trace.getRoot().getErrorCount()); + Reports.Trace.Error error = trace.getRoot().getError(0); + assertEquals("Invalid Syntax : offending token '' at line 1 column 18", error.getMessage()); + assertEquals(1, error.getLocation(0).getLine()); + assertEquals(18, error.getLocation(0).getColumn()); + } + + @Test + void testTracingValidationErrors() throws InvalidProtocolBufferException { + Map result = graphql.execute("{ widgets { notARealThing } }").toSpecification(); + + Object extensions = result.get("extensions"); + assertTrue(extensions instanceof Map); + + String ftv1 = ((Map) extensions).get("ftv1").toString(); + byte[] decoded = Base64.getDecoder().decode(ftv1); + + Reports.Trace trace = Reports.Trace.parseFrom(decoded); + assertEquals(1, trace.getRoot().getErrorCount()); + Reports.Trace.Error error = trace.getRoot().getError(0); + assertEquals("Validation error of type FieldUndefined: Field 'notARealThing' in type 'Widget' is undefined @ 'widgets/notARealThing'", error.getMessage()); + assertEquals(1, error.getLocation(0).getLine()); + assertEquals(13, error.getLocation(0).getColumn()); + } + + @Test + void testOptions() { + // This test primarily exists so that IntelliJ doesn't encourage you to take 'public' off + // of these two methods. + assertFalse(FederatedTracingInstrumentation.Options.newOptions().isDebuggingEnabled()); + } + + @Test + void testHTTPHeaders() { + Map headers = new HashMap<>(); + HTTPRequestHeaders context = headers::get; + ExecutionInput input = ExecutionInput.newExecutionInput("{widgets {foo}}") + .context(context) + .build(); + + // Because the context implements HTTPRequestHeaders but the special header isn't there, + // we don't get the trace extension (or any extensions). + Map result = graphql.execute(input).toSpecification(); + assertNull(result.get("extensions")); + + // Try again with the header having the wrong value. + headers.put("apollo-federation-include-trace", "bla"); + result = graphql.execute(input).toSpecification(); + assertNull(result.get("extensions")); + + // Now with the right value. + headers.put("apollo-federation-include-trace", "ftv1"); + result = graphql.execute(input).toSpecification(); + Object extensions = result.get("extensions"); + assertTrue(extensions instanceof Map); + assertTrue(((Map) extensions).containsKey("ftv1")); + } +} diff --git a/graphql-java-support/src/test/resources/com/apollographql/federation/graphqljava/schemas/tracing.graphql b/graphql-java-support/src/test/resources/com/apollographql/federation/graphqljava/schemas/tracing.graphql new file mode 100644 index 00000000..3b92252f --- /dev/null +++ b/graphql-java-support/src/test/resources/com/apollographql/federation/graphqljava/schemas/tracing.graphql @@ -0,0 +1,10 @@ +type Query { + widgets: [Widget!] + listOfLists: [[Widget!]!] + listOfScalars: [String!]! +} + +type Widget { + foo: String + bar: String +} diff --git a/spring-example/src/main/java/com/apollographql/federation/springexample/App.java b/spring-example/src/main/java/com/apollographql/federation/springexample/App.java index 0896b2c7..0dcc4b29 100644 --- a/spring-example/src/main/java/com/apollographql/federation/springexample/App.java +++ b/spring-example/src/main/java/com/apollographql/federation/springexample/App.java @@ -1,11 +1,19 @@ package com.apollographql.federation.springexample; +import com.apollographql.federation.graphqljava.tracing.FederatedTracingInstrumentation; +import graphql.execution.instrumentation.Instrumentation; import org.springframework.boot.SpringApplication; import org.springframework.boot.autoconfigure.SpringBootApplication; +import org.springframework.context.annotation.Bean; @SpringBootApplication public class App { public static void main(String[] args) { SpringApplication.run(App.class, args); } + + @Bean + public Instrumentation addFederatedTracing() { + return new FederatedTracingInstrumentation(new FederatedTracingInstrumentation.Options(true)); + } }