diff --git a/.eslintrc.cjs b/.eslintrc.cjs index 07a8efec45..dfd9cd09f8 100644 --- a/.eslintrc.cjs +++ b/.eslintrc.cjs @@ -33,6 +33,23 @@ module.exports = { }, overrides: [ ...overrides, + { + files: ['src/run/**'], + rules: { + 'no-restricted-imports': [ + 'error', + { + paths: [ + { + name: '@opentelemetry/api', + importNames: ['trace'], + message: 'Please use `getTracer()` from `./handlers/tracer.cjs` instead', + }, + ], + }, + ], + }, + }, { files: ['src/run/handlers/**'], rules: { diff --git a/src/build/templates/handler-monorepo.tmpl.js b/src/build/templates/handler-monorepo.tmpl.js index 106db0c3df..bc1231ee16 100644 --- a/src/build/templates/handler-monorepo.tmpl.js +++ b/src/build/templates/handler-monorepo.tmpl.js @@ -1,4 +1,9 @@ -import tracing, { trace } from '{{cwd}}/.netlify/dist/run/handlers/tracing.js' +import { + createRequestContext, + runWithRequestContext, +} from '{{cwd}}/.netlify/dist/run/handlers/request-context.cjs' +import { getTracer } from '{{cwd}}/.netlify/dist/run/handlers/tracer.cjs' +import tracing from '{{cwd}}/.netlify/dist/run/handlers/tracing.js' process.chdir('{{cwd}}') @@ -8,10 +13,11 @@ export default async function (req, context) { tracing.start() } - /** @type {import('@opentelemetry/api').Tracer} */ - const tracer = trace.getTracer('Next.js Runtime') - return tracer.startActiveSpan('Next.js Server Handler', async (span) => { - try { + const requestContext = createRequestContext(req.headers.get('x-nf-debug-logging')) + const tracer = getTracer() + + const handlerResponse = await runWithRequestContext(requestContext, () => { + return tracer.withActiveSpan('Next.js Server Handler', async (span) => { span.setAttributes({ 'account.id': context.account.id, 'deploy.id': context.deploy.id, @@ -31,16 +37,14 @@ export default async function (req, context) { 'http.status_code': response.status, }) return response - } catch (error) { - span.recordException(error) - if (error instanceof Error) { - span.addEvent({ name: error.name, message: error.message }) - } - throw error - } finally { - span.end() - } + }) }) + + if (requestContext.serverTiming) { + handlerResponse.headers.set('server-timing', requestContext.serverTiming) + } + + return handlerResponse } export const config = { diff --git a/src/build/templates/handler.tmpl.js b/src/build/templates/handler.tmpl.js index 1453d127a1..0f8e554938 100644 --- a/src/build/templates/handler.tmpl.js +++ b/src/build/templates/handler.tmpl.js @@ -1,15 +1,21 @@ +import { + createRequestContext, + runWithRequestContext, +} from './.netlify/dist/run/handlers/request-context.cjs' import serverHandler from './.netlify/dist/run/handlers/server.js' -import tracing, { trace } from './.netlify/dist/run/handlers/tracing.js' +import { getTracer } from './.netlify/dist/run/handlers/tracer.cjs' +import tracing from './.netlify/dist/run/handlers/tracing.js' export default async function handler(req, context) { if (process.env.NETLIFY_OTLP_TRACE_EXPORTER_URL) { tracing.start() } - /** @type {import('@opentelemetry/api').Tracer} */ - const tracer = trace.getTracer('Next.js Runtime') - return tracer.startActiveSpan('Next.js Server Handler', async (span) => { - try { + const requestContext = createRequestContext(req.headers.get('x-nf-debug-logging')) + const tracer = getTracer() + + const handlerResponse = await runWithRequestContext(requestContext, () => { + return tracer.withActiveSpan('Next.js Server Handler', async (span) => { span.setAttributes({ 'account.id': context.account.id, 'deploy.id': context.deploy.id, @@ -25,16 +31,14 @@ export default async function handler(req, context) { 'http.status_code': response.status, }) return response - } catch (error) { - span.recordException(error) - if (error instanceof Error) { - span.addEvent({ name: error.name, message: error.message }) - } - throw error - } finally { - span.end() - } + }) }) + + if (requestContext.serverTiming) { + handlerResponse.headers.set('server-timing', requestContext.serverTiming) + } + + return handlerResponse } export const config = { diff --git a/src/run/handlers/cache.cts b/src/run/handlers/cache.cts index a3c7d562eb..70ae9c6421 100644 --- a/src/run/handlers/cache.cts +++ b/src/run/handlers/cache.cts @@ -5,7 +5,7 @@ import { Buffer } from 'node:buffer' import { getDeployStore, Store } from '@netlify/blobs' import { purgeCache } from '@netlify/functions' -import { trace, type Span, SpanStatusCode } from '@opentelemetry/api' +import { type Span } from '@opentelemetry/api' import { NEXT_CACHE_TAGS_HEADER } from 'next/dist/lib/constants.js' import type { CacheHandler, @@ -15,6 +15,7 @@ import type { } from 'next/dist/server/lib/incremental-cache/index.js' import { getRequestContext } from './request-context.cjs' +import { getTracer } from './tracer.cjs' type TagManifest = { revalidatedAt: number } @@ -26,7 +27,7 @@ export class NetlifyCacheHandler implements CacheHandler { options: CacheHandlerContext revalidatedTags: string[] blobStore: Store - tracer = trace.getTracer('Netlify Cache Handler') + tracer = getTracer() tagManifestsFetchedFromBlobStoreInCurrentRequest: TagManifestBlobCache constructor(options: CacheHandlerContext) { @@ -90,77 +91,67 @@ export class NetlifyCacheHandler implements CacheHandler { } async get(...args: Parameters): ReturnType { - return this.tracer.startActiveSpan('get cache key', async (span) => { - try { - const [key, ctx = {}] = args - console.debug(`[NetlifyCacheHandler.get]: ${key}`) - - const blobKey = await this.encodeBlobKey(key) - span.setAttributes({ key, blobKey }) - const blob = (await this.blobStore.get(blobKey, { - type: 'json', - })) as CacheHandlerValue | null + return this.tracer.withActiveSpan('get cache key', async (span) => { + const [key, ctx = {}] = args + console.debug(`[NetlifyCacheHandler.get]: ${key}`) - // if blob is null then we don't have a cache entry - if (!blob) { - span.addEvent('Cache miss', { key, blobKey }) - return null - } + const blobKey = await this.encodeBlobKey(key) + span.setAttributes({ key, blobKey }) - const staleByTags = await this.checkCacheEntryStaleByTags(blob, ctx.tags, ctx.softTags) + const blob = (await this.tracer.withActiveSpan('blobStore.get', async (blobGetSpan) => { + blobGetSpan.setAttributes({ key, blobKey }) + return await this.blobStore.get(blobKey, { + type: 'json', + }) + })) as CacheHandlerValue | null - if (staleByTags) { - span.addEvent('Stale', { staleByTags }) - return null - } + // if blob is null then we don't have a cache entry + if (!blob) { + span.addEvent('Cache miss', { key, blobKey }) + return null + } - this.captureResponseCacheLastModified(blob, key, span) + const staleByTags = await this.checkCacheEntryStaleByTags(blob, ctx.tags, ctx.softTags) - switch (blob.value?.kind) { - case 'FETCH': - span.addEvent('FETCH', { lastModified: blob.lastModified, revalidate: ctx.revalidate }) - return { - lastModified: blob.lastModified, - value: blob.value, - } - - case 'ROUTE': - span.addEvent('ROUTE', { lastModified: blob.lastModified, status: blob.value.status }) - return { - lastModified: blob.lastModified, - value: { - ...blob.value, - body: Buffer.from(blob.value.body as unknown as string, 'base64'), - }, - } - case 'PAGE': - span.addEvent('PAGE', { lastModified: blob.lastModified }) - return { - lastModified: blob.lastModified, - value: blob.value, - } - default: - span.recordException(new Error(`Unknown cache entry kind: ${blob.value?.kind}`)) - // TODO: system level logging not implemented - } + if (staleByTags) { + span.addEvent('Stale', { staleByTags }) return null - } catch (error) { - if (error instanceof Error) { - span.recordException(error) - } - span.setStatus({ - code: SpanStatusCode.ERROR, - message: error instanceof Error ? error.message : String(error), - }) - throw error - } finally { - span.end() } + + this.captureResponseCacheLastModified(blob, key, span) + + switch (blob.value?.kind) { + case 'FETCH': + span.addEvent('FETCH', { lastModified: blob.lastModified, revalidate: ctx.revalidate }) + return { + lastModified: blob.lastModified, + value: blob.value, + } + + case 'ROUTE': + span.addEvent('ROUTE', { lastModified: blob.lastModified, status: blob.value.status }) + return { + lastModified: blob.lastModified, + value: { + ...blob.value, + body: Buffer.from(blob.value.body as unknown as string, 'base64'), + }, + } + case 'PAGE': + span.addEvent('PAGE', { lastModified: blob.lastModified }) + return { + lastModified: blob.lastModified, + value: blob.value, + } + default: + span.recordException(new Error(`Unknown cache entry kind: ${blob.value?.kind}`)) + } + return null }) } async set(...args: Parameters) { - return this.tracer.startActiveSpan('set cache key', async (span) => { + return this.tracer.withActiveSpan('set cache key', async (span) => { const [key, data] = args const blobKey = await this.encodeBlobKey(key) const lastModified = Date.now() @@ -189,7 +180,6 @@ export class NetlifyCacheHandler implements CacheHandler { }) } } - span.end() }) } @@ -264,22 +254,9 @@ export class NetlifyCacheHandler implements CacheHandler { if (!tagManifestPromise) { tagManifestPromise = this.encodeBlobKey(tag).then((blobKey) => { - return this.tracer.startActiveSpan(`get tag manifest`, async (span) => { + return this.tracer.withActiveSpan(`get tag manifest`, async (span) => { span.setAttributes({ tag, blobKey }) - try { - return await this.blobStore.get(blobKey, { type: 'json' }) - } catch (error) { - if (error instanceof Error) { - span.recordException(error) - } - span.setStatus({ - code: SpanStatusCode.ERROR, - message: error instanceof Error ? error.message : String(error), - }) - throw error - } finally { - span.end() - } + return this.blobStore.get(blobKey, { type: 'json' }) }) }) diff --git a/src/run/handlers/request-context.cts b/src/run/handlers/request-context.cts index 95205b330c..a2d9902214 100644 --- a/src/run/handlers/request-context.cts +++ b/src/run/handlers/request-context.cts @@ -1,16 +1,20 @@ import { AsyncLocalStorage } from 'node:async_hooks' export type RequestContext = { + debug: boolean responseCacheGetLastModified?: number responseCacheKey?: string usedFsRead?: boolean didPagesRouterOnDemandRevalidate?: boolean + serverTiming?: string } type RequestContextAsyncLocalStorage = AsyncLocalStorage -export function createRequestContext(): RequestContext { - return {} +export function createRequestContext(debug = false): RequestContext { + return { + debug, + } } const REQUEST_CONTEXT_GLOBAL_KEY = Symbol.for('nf-request-context-async-local-storage') diff --git a/src/run/handlers/server.ts b/src/run/handlers/server.ts index 443b7e2373..4e756d5ebb 100644 --- a/src/run/handlers/server.ts +++ b/src/run/handlers/server.ts @@ -1,7 +1,6 @@ import type { OutgoingHttpHeaders } from 'http' import { toComputeResponse, toReqRes, ComputeJsOutgoingMessage } from '@fastly/http-compute-js' -import { SpanStatusCode, trace } from '@opentelemetry/api' import type { NextConfigComplete } from 'next/dist/server/config-shared.js' import type { WorkerRequestHandler } from 'next/dist/server/lib/types.js' @@ -16,7 +15,8 @@ import { import { nextResponseProxy } from '../revalidate.js' import { logger } from '../systemlog.js' -import { createRequestContext, runWithRequestContext } from './request-context.cjs' +import { createRequestContext, getRequestContext } from './request-context.cjs' +import { getTracer } from './tracer.cjs' let nextHandler: WorkerRequestHandler, nextConfig: NextConfigComplete, tagsManifest: TagsManifest @@ -44,10 +44,10 @@ const disableFaultyTransferEncodingHandling = (res: ComputeJsOutgoingMessage) => } export default async (request: Request) => { - const tracer = trace.getTracer('Next.js Runtime') + const tracer = getTracer() if (!nextHandler) { - await tracer.startActiveSpan('initialize next server', async (span) => { + await tracer.withActiveSpan('initialize next server', async (span) => { // set the server config const { getRunConfig, setRunConfig } = await import('../config.js') nextConfig = await getRunConfig() @@ -72,35 +72,26 @@ export default async (request: Request) => { dir: process.cwd(), isDev: false, }) - span.end() }) } - return await tracer.startActiveSpan('generate response', async (span) => { + return await tracer.withActiveSpan('generate response', async (span) => { const { req, res } = toReqRes(request) - const requestContext = createRequestContext() - disableFaultyTransferEncodingHandling(res as unknown as ComputeJsOutgoingMessage) + const requestContext = getRequestContext() ?? createRequestContext() + const resProxy = nextResponseProxy(res, requestContext) // We don't await this here, because it won't resolve until the response is finished. - const nextHandlerPromise = runWithRequestContext(requestContext, () => - nextHandler(req, resProxy).catch((error) => { - logger.withError(error).error('next handler error') - console.error(error) - resProxy.statusCode = 500 - span.recordException(error) - span.setAttribute('http.status_code', 500) - span.setStatus({ - code: SpanStatusCode.ERROR, - message: error instanceof Error ? error.message : String(error), - }) - span.end() - resProxy.end('Internal Server Error') - }), - ) + const nextHandlerPromise = nextHandler(req, resProxy).catch((error) => { + logger.withError(error).error('next handler error') + console.error(error) + resProxy.statusCode = 500 + span.setAttribute('http.status_code', 500) + resProxy.end('Internal Server Error') + }) // Contrary to the docs, this resolves when the headers are available, not when the stream closes. // See https://github.com/fastly/http-compute-js/blob/main/src/http-compute-js/http-server.ts#L168-L173 @@ -125,7 +116,6 @@ export default async (request: Request) => { // TODO: Remove once a fix has been rolled out. if ((response.status > 300 && response.status < 400) || response.status >= 500) { const body = await response.text() - span.end() return new Response(body || null, response) } @@ -133,9 +123,7 @@ export default async (request: Request) => { flush() { // it's important to keep the stream open until the next handler has finished, // or otherwise the cache revalidates might not go through - return nextHandlerPromise.then(() => { - span.end() - }) + return nextHandlerPromise }, }) diff --git a/src/run/handlers/tracer.cts b/src/run/handlers/tracer.cts new file mode 100644 index 0000000000..9c8cb85c82 --- /dev/null +++ b/src/run/handlers/tracer.cts @@ -0,0 +1,91 @@ +// Here we need to actually import `trace` from @opentelemetry/api to add extra wrappers +// other places should import `getTracer` from this module +// eslint-disable-next-line no-restricted-imports +import { trace, Tracer, Span } from '@opentelemetry/api' +import { SugaredTracer, wrapTracer } from '@opentelemetry/api/experimental' + +import { RequestContext, getRequestContext } from './request-context.cjs' + +const spanMeta = new WeakMap() +const spanCounter = new WeakMap() + +function spanHook(span: Span): Span { + const originalEnd = span.end.bind(span) + + span.end = (endTime) => { + originalEnd(endTime) + + const meta = spanMeta.get(span) + if (meta) { + const requestContext = getRequestContext() + if (requestContext?.debug) { + const duration = (typeof endTime === 'number' ? endTime : performance.now()) - meta.start + + const serverTiming = requestContext.serverTiming ?? '' + const currentRequestSpanCounter = spanCounter.get(requestContext) ?? 1 + + requestContext.serverTiming = `${serverTiming}${serverTiming.length === 0 ? '' : ', '}s${currentRequestSpanCounter};dur=${duration};desc="${meta.name}"` + + spanCounter.set(requestContext, currentRequestSpanCounter + 1) + } + } + + spanMeta.delete(span) + } + + return span +} + +// startSpan and startActiveSpan don't automatically handle span ending and error handling +// so this typing just tries to enforce not using those methods in our code +// we should be using withActiveSpan (and optionally withSpan) instead +export type RuntimeTracer = Omit + +let tracer: RuntimeTracer | undefined + +export function getTracer(): RuntimeTracer { + if (!tracer) { + const baseTracer = trace.getTracer('Next.js Runtime') + + // we add hooks to capture span start and end events to be able to add server-timings + // while preserving OTEL api + const startSpan = baseTracer.startSpan.bind(baseTracer) + baseTracer.startSpan = ( + ...args: Parameters + ): ReturnType => { + const span = startSpan(...args) + spanMeta.set(span, { start: performance.now(), name: args[0] }) + return spanHook(span) + } + + const startActiveSpan = baseTracer.startActiveSpan.bind(baseTracer) + + // @ts-expect-error Target signature provides too few arguments. Expected 4 or more, but got 2. + baseTracer.startActiveSpan = ( + ...args: Parameters + ): ReturnType => { + const [name, ...restOfArgs] = args + + const augmentedArgs = restOfArgs.map((arg) => { + // callback might be 2nd, 3rd or 4th argument depending on used signature + // only callback can be a function so target that and keep rest arguments as-is + if (typeof arg === 'function') { + return (span: Span) => { + spanMeta.set(span, { start: performance.now(), name: args[0] }) + spanHook(span) + return arg(span) + } + } + + return arg + }) as typeof restOfArgs + + return startActiveSpan(name, ...augmentedArgs) + } + + // finally use SugaredTracer + tracer = wrapTracer(baseTracer) + } + + return tracer +} diff --git a/src/run/handlers/tracing.ts b/src/run/handlers/tracing.ts index edd1c0c1d5..5942b8c4a6 100644 --- a/src/run/handlers/tracing.ts +++ b/src/run/handlers/tracing.ts @@ -23,7 +23,6 @@ const sdk = new NodeSDK({ ), }) export default sdk -export { trace } from '@opentelemetry/api' // gracefully shut down the SDK on process exit process.on('SIGTERM', () => { diff --git a/src/run/headers.ts b/src/run/headers.ts index ce5ad20af7..e2f5ea092f 100644 --- a/src/run/headers.ts +++ b/src/run/headers.ts @@ -1,11 +1,12 @@ import { getDeployStore } from '@netlify/blobs' -import type { Span, Tracer } from '@opentelemetry/api' +import type { Span } from '@opentelemetry/api' import type { NextConfigComplete } from 'next/dist/server/config-shared.js' import { encodeBlobKey } from '../shared/blobkey.js' import type { TagsManifest } from './config.js' import type { RequestContext } from './handlers/request-context.cjs' +import type { RuntimeTracer } from './handlers/tracer.cjs' interface NetlifyVaryValues { headers: string[] @@ -95,7 +96,7 @@ export const adjustDateHeader = async ({ headers: Headers request: Request span: Span - tracer: Tracer + tracer: RuntimeTracer requestContext: RequestContext }) => { const cacheState = headers.get('x-nextjs-cache') @@ -132,7 +133,7 @@ export const adjustDateHeader = async ({ const blobStore = getDeployStore({ fetch: fetchBeforeNextPatchedIt, consistency: 'strong' }) // TODO: use metadata for this - lastModified = await tracer.startActiveSpan( + lastModified = await tracer.withActiveSpan( 'get cache to calculate date header', async (getBlobForDateSpan) => { getBlobForDateSpan.setAttributes({ @@ -142,7 +143,6 @@ export const adjustDateHeader = async ({ const blob = (await blobStore.get(blobKey, { type: 'json' })) ?? {} getBlobForDateSpan.addEvent(blob ? 'Cache hit' : 'Cache miss') - getBlobForDateSpan.end() return blob.lastModified }, ) diff --git a/src/run/next.cts b/src/run/next.cts index 09f77a94c0..4a5a3e2eb2 100644 --- a/src/run/next.cts +++ b/src/run/next.cts @@ -2,20 +2,22 @@ import fs from 'fs/promises' import { relative, resolve } from 'path' import { getDeployStore } from '@netlify/blobs' -import { trace } from '@opentelemetry/api' // @ts-expect-error no types installed import { patchFs } from 'fs-monkey' -import type { getRequestHandlers } from 'next/dist/server/lib/start-server.js' +import type { getRequestHandlers as GetRequestHandlersSignature } from 'next/dist/server/lib/start-server.js' import { getRequestContext } from './handlers/request-context.cjs' +import { getTracer } from './handlers/tracer.cjs' type FS = typeof import('fs') const fetchBeforeNextPatchedIt = globalThis.fetch -export async function getMockedRequestHandlers(...args: Parameters) { - const tracer = trace.getTracer('Next.js Runtime') - return tracer.startActiveSpan('mocked request handler', async (span) => { +export async function getMockedRequestHandlers( + ...args: Parameters +) { + const tracer = getTracer() + return tracer.withActiveSpan('mocked request handler', async () => { const ofs = { ...fs } const { encodeBlobKey } = await import('../shared/blobkey.js') @@ -55,11 +57,11 @@ export async function getMockedRequestHandlers(...args: Parameters import('next/dist/server/lib/start-server.js'), + ) + return getRequestHandlers(...args) }) } diff --git a/tests/integration/request-context.test.ts b/tests/integration/request-context.test.ts index a7e5b173e2..04eec98609 100644 --- a/tests/integration/request-context.test.ts +++ b/tests/integration/request-context.test.ts @@ -132,22 +132,24 @@ describe('request-context does NOT leak between concurrent requests', () => { }) // fastCall finished completely so it should have acquired request context - expect(getRequestContextSpy).toHaveBeenCalledTimes(1) - expect(getRequestContextSpy).toHaveNthReturnedWith(1, { - responseCacheGetLastModified: new Date(mockedDateForRevalidateAutomatic).getTime(), - responseCacheKey: '/static/revalidate-automatic', - }) + expect(getRequestContextSpy).toHaveLastReturnedWith( + expect.objectContaining({ + responseCacheGetLastModified: new Date(mockedDateForRevalidateAutomatic).getTime(), + responseCacheKey: '/static/revalidate-automatic', + }), + ) // second request finished - now we can unpause the first one unpauseSlowCall() const slowCall = await slowCallPromise // slowCall finished completely so it should have acquired request context - expect(getRequestContextSpy).toHaveBeenCalledTimes(2) - expect(getRequestContextSpy).toHaveNthReturnedWith(2, { - responseCacheGetLastModified: new Date(mockedDateForRevalidateSlow).getTime(), - responseCacheKey: '/static/revalidate-slow', - }) + expect(getRequestContextSpy).toHaveLastReturnedWith( + expect.objectContaining({ + responseCacheGetLastModified: new Date(mockedDateForRevalidateSlow).getTime(), + responseCacheKey: '/static/revalidate-slow', + }), + ) expect(slowCall.headers['date']).toBe(mockedDateForRevalidateSlow) expect(fastCall.headers['date']).toBe(mockedDateForRevalidateAutomatic) @@ -205,22 +207,24 @@ describe('request-context does NOT leak between concurrent requests', () => { }) // fastCall finished completely so it should have acquired request context - expect(getRequestContextSpy).toHaveBeenCalledTimes(1) - expect(getRequestContextSpy).toHaveNthReturnedWith(1, { - responseCacheGetLastModified: new Date(mockedDateForStaticFetch1).getTime(), - responseCacheKey: '/static-fetch/1', - }) + expect(getRequestContextSpy).toHaveLastReturnedWith( + expect.objectContaining({ + responseCacheGetLastModified: new Date(mockedDateForStaticFetch1).getTime(), + responseCacheKey: '/static-fetch/1', + }), + ) // second request finished - now we can unpause the first one unpauseSlowCall() const slowCall = await slowCallPromise // slowCall finished completely so it should have acquired request context - expect(getRequestContextSpy).toHaveBeenCalledTimes(2) - expect(getRequestContextSpy).toHaveNthReturnedWith(2, { - responseCacheGetLastModified: new Date(mockedDateForStaticFetch2).getTime(), - responseCacheKey: '/static-fetch/2', - }) + expect(getRequestContextSpy).toHaveLastReturnedWith( + expect.objectContaining({ + responseCacheGetLastModified: new Date(mockedDateForStaticFetch2).getTime(), + responseCacheKey: '/static-fetch/2', + }), + ) expect(slowCall.headers['date']).toBe(mockedDateForStaticFetch2) expect(fastCall.headers['date']).toBe(mockedDateForStaticFetch1)