Skip to content

Commit

Permalink
feat(otlp-exporter-base): add retries (#3207)
Browse files Browse the repository at this point in the history
* feat(otlp-exporter-base): add retries to sendWithHttp

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): add tests and update abort logic

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): fix lint

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): add retry test

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): add retry to browser exporter and add tests

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): refactor

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): add jitter

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): initialize reqIsDestroyed to false

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): add throttle logic

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): add retry to readme

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): add changelog

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): update throttle time function

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): refactor sec difference in throttle fun

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): fix lint

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): fix lint

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): fix lint

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): refactor retrieve throttle time func

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): fix lint

Signed-off-by: Svetlana Brennan <[email protected]>

* feat(otlp-exporter-base): move parseRetryAfterToMills to utils file

Signed-off-by: Svetlana Brennan <[email protected]>

---------

Signed-off-by: Svetlana Brennan <[email protected]>
Co-authored-by: Chengzhong Wu <[email protected]>
  • Loading branch information
svetlanabrennan and legendecas committed Feb 28, 2023
1 parent 708afd0 commit abfe059
Show file tree
Hide file tree
Showing 10 changed files with 419 additions and 117 deletions.
1 change: 1 addition & 0 deletions experimental/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ All notable changes to experimental packages in this project will be documented
* deps: remove unused proto-loader dependencies and update grpc-js and proto-loader versions [#3337](https://github.com/open-telemetry/opentelemetry-js/pull/3337) @seemk
* feat(metrics-exporters): configure temporality via environment variable [#3305](https://github.com/open-telemetry/opentelemetry-js/pull/3305) @pichlermarc
* feat(console-metric-exporter): add temporality configuration [#3387](https://github.com/open-telemetry/opentelemetry-js/pull/3387) @pichlermarc
* feat(otlp-exporter-base): add retries [#3207](https://github.com/open-telemetry/opentelemetry-js/pull/3207) @svetlanabrennan

### :bug: (Bug Fix)

Expand Down
15 changes: 15 additions & 0 deletions experimental/packages/exporter-trace-otlp-http/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,21 @@ To override the default timeout duration, use the following options:

> Providing `timeoutMillis` with `collectorOptions` takes precedence and overrides timeout set with environment variables.

## OTLP Exporter Retry

OTLP requires that transient errors be handled with a [retry strategy](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md#retry).

This retry policy has the following configuration, which there is currently no way to customize.

+ `DEFAULT_EXPORT_MAX_ATTEMPTS`: The maximum number of attempts, including the original request. Defaults to 5.
+ `DEFAULT_EXPORT_INITIAL_BACKOFF`: The initial backoff duration. Defaults to 1 second.
+ `DEFAULT_EXPORT_MAX_BACKOFF`: The maximum backoff duration. Defaults to 5 seconds.
+ `DEFAULT_EXPORT_BACKOFF_MULTIPLIER`: The backoff multiplier. Defaults to 1.5.

This retry policy first checks if the response has a `'Retry-After'` header. If there is a `'Retry-After'` header, the exporter will wait the amount specified in the `'Retry-After'` header before retrying. If there is no `'Retry-After'` header, the exporter will use an exponential backoff with jitter retry strategy.

> The exporter will retry exporting within the [exporter timeout configuration](#Exporter-Timeout-Configuration) time.

## Running opentelemetry-collector locally to see the traces

1. Go to `examples/otlp-exporter-node`
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -582,3 +582,123 @@ describe('when configuring via environment', () => {
envSource.OTEL_EXPORTER_OTLP_HEADERS = '';
});
});

describe('export with retry - real http request destroyed', () => {
let server: any;
let collectorTraceExporter: OTLPTraceExporter;
let collectorExporterConfig: OTLPExporterConfigBase;
let spans: ReadableSpan[];

beforeEach(() => {
server = sinon.fakeServer.create({
autoRespond: true,
});
collectorExporterConfig = {
timeoutMillis: 1500,
};
});

afterEach(() => {
server.restore();
});

describe('when "sendBeacon" is NOT available', () => {
beforeEach(() => {
(window.navigator as any).sendBeacon = false;
collectorTraceExporter = new OTLPTraceExporter(collectorExporterConfig);
});
it('should log the timeout request error message when retrying with exponential backoff with jitter', done => {
spans = [];
spans.push(Object.assign({}, mockedReadableSpan));

let retry = 0;
server.respondWith(
'http://localhost:4318/v1/traces',
function (xhr: any) {
retry++;
xhr.respond(503);
}
);

collectorTraceExporter.export(spans, result => {
assert.strictEqual(result.code, core.ExportResultCode.FAILED);
const error = result.error as OTLPExporterError;
assert.ok(error !== undefined);
assert.strictEqual(error.message, 'Request Timeout');
assert.strictEqual(retry, 1);
done();
});
}).timeout(3000);

it('should log the timeout request error message when retry-after header is set to 3 seconds', done => {
spans = [];
spans.push(Object.assign({}, mockedReadableSpan));

let retry = 0;
server.respondWith(
'http://localhost:4318/v1/traces',
function (xhr: any) {
retry++;
xhr.respond(503, { 'Retry-After': 3 });
}
);

collectorTraceExporter.export(spans, result => {
assert.strictEqual(result.code, core.ExportResultCode.FAILED);
const error = result.error as OTLPExporterError;
assert.ok(error !== undefined);
assert.strictEqual(error.message, 'Request Timeout');
assert.strictEqual(retry, 1);
done();
});
}).timeout(3000);
it('should log the timeout request error message when retry-after header is a date', done => {
spans = [];
spans.push(Object.assign({}, mockedReadableSpan));

let retry = 0;
server.respondWith(
'http://localhost:4318/v1/traces',
function (xhr: any) {
retry++;
const d = new Date();
d.setSeconds(d.getSeconds() + 1);
xhr.respond(503, { 'Retry-After': d });
}
);

collectorTraceExporter.export(spans, result => {
assert.strictEqual(result.code, core.ExportResultCode.FAILED);
const error = result.error as OTLPExporterError;
assert.ok(error !== undefined);
assert.strictEqual(error.message, 'Request Timeout');
assert.strictEqual(retry, 2);
done();
});
}).timeout(3000);
it('should log the timeout request error message when retry-after header is a date with long delay', done => {
spans = [];
spans.push(Object.assign({}, mockedReadableSpan));

let retry = 0;
server.respondWith(
'http://localhost:4318/v1/traces',
function (xhr: any) {
retry++;
const d = new Date();
d.setSeconds(d.getSeconds() + 120);
xhr.respond(503, { 'Retry-After': d });
}
);

collectorTraceExporter.export(spans, result => {
assert.strictEqual(result.code, core.ExportResultCode.FAILED);
const error = result.error as OTLPExporterError;
assert.ok(error !== undefined);
assert.strictEqual(error.message, 'Request Timeout');
assert.strictEqual(retry, 1);
done();
});
}).timeout(3000);
});
});
Original file line number Diff line number Diff line change
Expand Up @@ -551,38 +551,3 @@ describe('export - real http request destroyed before response received', () =>
}, 0);
});
});

describe('export - real http request destroyed after response received', () => {
let collectorExporter: OTLPTraceExporter;
let collectorExporterConfig: OTLPExporterNodeConfigBase;
let spans: ReadableSpan[];

const server = http.createServer((_, res) => {
res.write('writing something');
});
before(done => {
server.listen(8081, done);
});
after(done => {
server.close(done);
});
it('should log the timeout request error message', done => {
collectorExporterConfig = {
url: 'http://localhost:8081',
timeoutMillis: 300,
};
collectorExporter = new OTLPTraceExporter(collectorExporterConfig);
spans = [];
spans.push(Object.assign({}, mockedReadableSpan));

setTimeout(() => {
collectorExporter.export(spans, result => {
assert.strictEqual(result.code, core.ExportResultCode.FAILED);
const error = result.error as OTLPExporterError;
assert.ok(error !== undefined);
assert.strictEqual(error.message, 'Request Timeout');
done();
});
}, 0);
});
});
15 changes: 15 additions & 0 deletions experimental/packages/exporter-trace-otlp-proto/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,21 @@ To override the default timeout duration, use the following options:

> Providing `timeoutMillis` with `collectorOptions` takes precedence and overrides timeout set with environment variables.

## OTLP Exporter Retry

OTLP requires that transient errors be handled with a [retry strategy](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/protocol/exporter.md#retry).

This retry policy has the following configuration, which there is currently no way to customize.

+ `DEFAULT_EXPORT_MAX_ATTEMPTS`: The maximum number of attempts, including the original request. Defaults to 5.
+ `DEFAULT_EXPORT_INITIAL_BACKOFF`: The initial backoff duration. Defaults to 1 second.
+ `DEFAULT_EXPORT_MAX_BACKOFF`: The maximum backoff duration. Defaults to 5 seconds.
+ `DEFAULT_EXPORT_BACKOFF_MULTIPLIER`: The backoff multiplier. Defaults to 1.5.

This retry policy first checks if the response has a `'Retry-After'` header. If there is a `'Retry-After'` header, the exporter will wait the amount specified in the `'Retry-After'` header before retrying. If there is no `'Retry-After'` header, the exporter will use an exponential backoff with jitter retry strategy.

> The exporter will retry exporting within the [exporter timeout configuration](#Exporter-Timeout-Configuration) time.

## Running opentelemetry-collector locally to see the traces

1. Go to examples/otlp-exporter-node
Expand Down
124 changes: 92 additions & 32 deletions experimental/packages/otlp-exporter-base/src/platform/browser/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@
*/
import { diag } from '@opentelemetry/api';
import { OTLPExporterError } from '../../types';
import {
DEFAULT_EXPORT_MAX_ATTEMPTS,
DEFAULT_EXPORT_INITIAL_BACKOFF,
DEFAULT_EXPORT_BACKOFF_MULTIPLIER,
DEFAULT_EXPORT_MAX_BACKOFF,
isExportRetryable,
parseRetryAfterToMills,
} from '../../util';

/**
* Send metrics/spans using browser navigator.sendBeacon
Expand Down Expand Up @@ -57,47 +65,99 @@ export function sendWithXhr(
onSuccess: () => void,
onError: (error: OTLPExporterError) => void
): void {
let reqIsDestroyed: boolean;
let retryTimer: ReturnType<typeof setTimeout>;
let xhr: XMLHttpRequest;
let reqIsDestroyed = false;

const exporterTimer = setTimeout(() => {
clearTimeout(retryTimer);
reqIsDestroyed = true;
xhr.abort();

if (xhr.readyState === XMLHttpRequest.DONE) {
const err = new OTLPExporterError('Request Timeout');
onError(err);
} else {
xhr.abort();
}
}, exporterTimeout);

const xhr = new XMLHttpRequest();
xhr.open('POST', url);
const sendWithRetry = (
retries = DEFAULT_EXPORT_MAX_ATTEMPTS,
minDelay = DEFAULT_EXPORT_INITIAL_BACKOFF
) => {
xhr = new XMLHttpRequest();
xhr.open('POST', url);

const defaultHeaders = {
Accept: 'application/json',
'Content-Type': 'application/json',
};
const defaultHeaders = {
Accept: 'application/json',
'Content-Type': 'application/json',
};

Object.entries({
...defaultHeaders,
...headers,
}).forEach(([k, v]) => {
xhr.setRequestHeader(k, v);
});
Object.entries({
...defaultHeaders,
...headers,
}).forEach(([k, v]) => {
xhr.setRequestHeader(k, v);
});

xhr.send(body);
xhr.send(body);

xhr.onreadystatechange = () => {
if (xhr.readyState === XMLHttpRequest.DONE) {
if (xhr.status >= 200 && xhr.status <= 299) {
clearTimeout(exporterTimer);
diag.debug('xhr success', body);
onSuccess();
} else if (reqIsDestroyed) {
const error = new OTLPExporterError('Request Timeout', xhr.status);
onError(error);
} else {
const error = new OTLPExporterError(
`Failed to export with XHR (status: ${xhr.status})`,
xhr.status
);
clearTimeout(exporterTimer);
onError(error);
xhr.onreadystatechange = () => {
if (xhr.readyState === XMLHttpRequest.DONE && reqIsDestroyed === false) {
if (xhr.status >= 200 && xhr.status <= 299) {
diag.debug('xhr success', body);
onSuccess();
clearTimeout(exporterTimer);
clearTimeout(retryTimer);
} else if (xhr.status && isExportRetryable(xhr.status) && retries > 0) {
let retryTime: number;
minDelay = DEFAULT_EXPORT_BACKOFF_MULTIPLIER * minDelay;

// retry after interval specified in Retry-After header
if (xhr.getResponseHeader('Retry-After')) {
retryTime = parseRetryAfterToMills(
xhr.getResponseHeader('Retry-After')!
);
} else {
// exponential backoff with jitter
retryTime = Math.round(
Math.random() * (DEFAULT_EXPORT_MAX_BACKOFF - minDelay) + minDelay
);
}

retryTimer = setTimeout(() => {
sendWithRetry(retries - 1, minDelay);
}, retryTime);
} else {
const error = new OTLPExporterError(
`Failed to export with XHR (status: ${xhr.status})`,
xhr.status
);
onError(error);
clearTimeout(exporterTimer);
clearTimeout(retryTimer);
}
}
}
};

xhr.onabort = () => {
if (reqIsDestroyed) {
const err = new OTLPExporterError('Request Timeout');
onError(err);
}
clearTimeout(exporterTimer);
clearTimeout(retryTimer);
};

xhr.onerror = () => {
if (reqIsDestroyed) {
const err = new OTLPExporterError('Request Timeout');
onError(err);
}
clearTimeout(exporterTimer);
clearTimeout(retryTimer);
};
};

sendWithRetry();
}
Loading

0 comments on commit abfe059

Please sign in to comment.