Skip to content

Commit

Permalink
ui: update changefeed metrics page
Browse files Browse the repository at this point in the history
This PR does the following changes:
- Added a `scale` parameter to `Metrics` so that I could support a duration
  metric that's being emitted in Seconds rather than Nanoseconds.
- Added support for minutes and hours in Duration graphs
- There is now a "Changefeed Status" graph to show counts of
  Running/Paused/Failed
- There is now a "Commit Latency" graph to show P50,P90, and P99 commit
  latencies
- Sink Byte Traffic is now Emitted Bytes
- Sink Timings has been removed because I don't believe either of the metrics
  exist anymore
- Max Changefeed Latency is now Max Checkpoint Latency
- There is now a Backfill Pending Ranges graph
- There is now a Oldest Protected Timestamp graph
- There is now a Schema Registry Registrations graph

Release note (ui change): The metrics page for changefeeds has been updated
with new graphs to track backfill progress, protected timestamps age, and
number of schema registry registrations.
  • Loading branch information
samiskin committed Apr 25, 2023
1 parent 352ec11 commit f018344
Show file tree
Hide file tree
Showing 9 changed files with 177 additions and 63 deletions.
2 changes: 1 addition & 1 deletion pkg/ccl/changefeedccl/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,7 @@ var (
// for now.
metaChangefeedMaxBehindNanos = metric.Metadata{
Name: "changefeed.max_behind_nanos",
Help: "Largest commit-to-emit duration of any running feed",
Help: "Time since the changefeed last checkpointed its progress",
Measurement: "Nanoseconds",
Unit: metric.Unit_NANOSECONDS,
}
Expand Down
49 changes: 49 additions & 0 deletions pkg/ui/workspaces/cluster-ui/src/graphs/utils/domain.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// Copyright 2023 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

import { ComputeDurationAxisDomain } from "./domain";

describe("Domain utils", () => {
describe("ComputeDurationAxisDomain", () => {
it("should correctly format the label and guide", () => {
const nsTestCases = [
{ ns: 5, value: "5.00", unit: "ns" },
{ ns: 60_000, value: "60.00", unit: "µs" },
{ ns: 7_000_000, value: "7.00", unit: "ms" },
{ ns: 40_240_000_000, value: "40.24", unit: "s" },
{ ns: 100_000_000_500, value: "1.67", unit: "min" },
{ ns: 4_000_000_000_000, value: "1.11", unit: "hr" },
{ ns: 600_000_000_000_000, value: "166.67", unit: "hr" },
];

for (const { ns: extentMax, unit } of nsTestCases) {
const axis = ComputeDurationAxisDomain([0, extentMax]);
expect(axis).toHaveProperty("label", unit);
expect(axis).toHaveProperty("guideFormat");
expect(axis.guideFormat(undefined)).toEqual(`0.00 ns`);
for (const {
ns: guideNs,
value: guideValue,
unit: guideUnit,
} of nsTestCases) {
expect(axis.guideFormat(guideNs)).toEqual(
`${guideValue} ${guideUnit}`,
);
}
}
});

it("should use the units of the lowest extent if given undefined or 0", () => {
const axis = ComputeDurationAxisDomain([2000, 2_500_000]);
expect(axis.guideFormat(undefined)).toEqual(`0.00 µs`);
expect(axis.guideFormat(0)).toEqual(`0.00 µs`);
});
});
});
18 changes: 11 additions & 7 deletions pkg/ui/workspaces/cluster-ui/src/graphs/utils/domain.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ import {
ComputeByteScale,
ComputeDurationScale,
DATE_WITH_SECONDS_FORMAT_24_TZ,
DurationFitScale,
FormatWithTimezone,
} from "src/util/format";

Expand Down Expand Up @@ -223,15 +222,20 @@ export function ComputeByteAxisDomain(extent: Extent): AxisDomain {
return axisDomain;
}

function ComputeDurationAxisDomain(extent: Extent): AxisDomain {
const scale = ComputeDurationScale(extent[1]);
const prefixFactor = scale.value;
export function ComputeDurationAxisDomain(extent: Extent): AxisDomain {
const extentScales = extent.map(e => ComputeDurationScale(e));

const axisDomain = computeAxisDomain(extent, prefixFactor);
const axisDomain = computeAxisDomain(extent, extentScales[1].value);
axisDomain.label = extentScales[1].units;

axisDomain.label = scale.units;
axisDomain.guideFormat = (nanoseconds: number) => {
if (!nanoseconds) {
return `0.00 ${extentScales[0].units}`;
}
const scale = ComputeDurationScale(nanoseconds);
return `${(nanoseconds / scale.value).toFixed(2)} ${scale.units}`;
};

axisDomain.guideFormat = DurationFitScale(scale.units);
return axisDomain;
}

Expand Down
15 changes: 0 additions & 15 deletions pkg/ui/workspaces/cluster-ui/src/util/format.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@

import { assert } from "chai";
import {
DurationFitScale,
durationUnits,
BytesFitScale,
byteUnits,
HexStringToInt64String,
Expand All @@ -20,19 +18,6 @@ import {
} from "./format";

describe("Format utils", () => {
describe("DurationFitScale", () => {
it("converts nanoseconds to provided units", () => {
// test zero values
assert.equal(DurationFitScale(durationUnits[0])(undefined), "0.00 ns");
assert.equal(DurationFitScale(durationUnits[0])(0), "0.00 ns");
// "ns", "µs", "ms", "s"
assert.equal(DurationFitScale(durationUnits[0])(32), "32.00 ns");
assert.equal(DurationFitScale(durationUnits[1])(32120), "32.12 µs");
assert.equal(DurationFitScale(durationUnits[2])(32122300), "32.12 ms");
assert.equal(DurationFitScale(durationUnits[3])(32122343000), "32.12 s");
});
});

describe("BytesFitScale", () => {
it("converts bytes to provided units", () => {
// test zero values
Expand Down
35 changes: 14 additions & 21 deletions pkg/ui/workspaces/cluster-ui/src/util/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,16 @@ export const byteUnits: string[] = [
"ZiB",
"YiB",
];
export const durationUnits: string[] = ["ns", "µs", "ms", "s"];

export const durationUnitsDescending = [
{ units: "hr", value: 60 * 60 * 1_000_000_000 },
{ units: "min", value: 60 * 1_000_000_000 },
{ units: "s", value: 1_000_000_000 },
{ units: "ms", value: 1_000_000 },
{ units: "µs", value: 1_000 },
{ units: "ns", value: 1 },
];

export const countUnits: string[] = ["", "k", "m", "b"];

interface UnitValue {
Expand Down Expand Up @@ -138,12 +147,10 @@ export function Percentage(
* ComputeDurationScale calculates an appropriate scale factor and unit to use
* to display a given duration value, without actually converting the value.
*/
export function ComputeDurationScale(nanoseconds: number): UnitValue {
const scale = ComputePrefixExponent(nanoseconds, 1000, durationUnits);
return {
value: Math.pow(1000, scale),
units: durationUnits[scale],
};
export function ComputeDurationScale(ns: number): UnitValue {
return durationUnitsDescending.find(
({ value }) => ns / value >= 1 || value == 1,
);
}

/**
Expand All @@ -170,20 +177,6 @@ export function DurationCheckSample(nanoseconds: number): string {
return Duration(nanoseconds);
}

/**
* Cast nanoseconds to provided scale units
*/
// tslint:disable-next-line: variable-name
export const DurationFitScale =
(scale: string) =>
(nanoseconds: number): string => {
if (!nanoseconds) {
return `0.00 ${scale}`;
}
const n = durationUnits.indexOf(scale);
return `${(nanoseconds / Math.pow(1000, n)).toFixed(2)} ${scale}`;
};

export const DATE_FORMAT = "MMM DD, YYYY [at] H:mm";
export const DATE_WITH_SECONDS_AND_MILLISECONDS_FORMAT =
"MMM DD, YYYY [at] H:mm:ss:ms";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,8 +293,8 @@ export class InternalLineGraph extends React.Component<LineGraphProps, {}> {
// and are called when recomputing certain axis and
// series options. This lets us use updated domains
// when redrawing the uPlot chart on data change.
const resultDatapoints = _.flatMap(data.results, result =>
result.datapoints.map(dp => dp.value),
const resultDatapoints = _.flatMap(fData, result =>
result.values.map(dp => dp.value),
);
this.yAxisDomain = calculateYAxisDomain(axis.props.units, resultDatapoints);
this.xAxisDomain = calculateXAxisDomain(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,25 +21,49 @@ export default function (props: GraphDashboardProps) {

return [
<LineGraph
title="Max Changefeed Latency"
title="Changefeed Status"
isKvGraph={false}
sources={storeSources}
>
<Axis units={AxisUnits.Duration} label="time">
<Axis units={AxisUnits.Count} label="count">
<Metric
name="cr.node.changefeed.max_behind_nanos"
title="Max Changefeed Latency"
downsampleMax
aggregateMax
name="cr.node.jobs.changefeed.currently_running"
title="Running"
/>
<Metric
name="cr.node.jobs.changefeed.currently_paused"
title="Paused"
/>
<Metric name="cr.node.jobs.changefeed.resume_failed" title="Failed" />
</Axis>
</LineGraph>,

<LineGraph
title="Sink Byte Traffic"
title="Commit Latency"
tooltip={`The difference between an event's MVCC timestamp and the time it was acknowledged as received by the downstream sink.`}
isKvGraph={false}
sources={storeSources}
>
<Axis units={AxisUnits.Duration} label="latency">
<Metric
name="cr.node.changefeed.commit_latency-p99"
title="99th Percentile"
downsampleMax
/>
<Metric
name="cr.node.changefeed.commit_latency-p90"
title="90th Percentile"
downsampleMax
/>
<Metric
name="cr.node.changefeed.commit_latency-p50"
title="50th Percentile"
downsampleMax
/>
</Axis>
</LineGraph>,

<LineGraph title="Emitted Bytes" isKvGraph={false} sources={storeSources}>
<Axis units={AxisUnits.Bytes} label="bytes">
<Metric
name="cr.node.changefeed.emitted_bytes"
Expand All @@ -64,23 +88,25 @@ export default function (props: GraphDashboardProps) {
</Axis>
</LineGraph>,

<LineGraph title="Sink Timings" isKvGraph={false} sources={storeSources}>
<LineGraph
title="Max Checkpoint Latency"
isKvGraph={false}
tooltip={`The most any changefeed's persisted checkpoint is behind the present. Larger values indicate issues with successfully ingesting or emitting changes. If errors cause a changefeed to restart, or the changefeed is paused and unpaused, emitted data up to the last checkpoint may be re-emitted.`}
sources={storeSources}
>
<Axis units={AxisUnits.Duration} label="time">
<Metric
name="cr.node.changefeed.emit_nanos"
title="Message Emit Time"
nonNegativeRate
/>
<Metric
name="cr.node.changefeed.flush_nanos"
title="Flush Time"
nonNegativeRate
name="cr.node.changefeed.max_behind_nanos"
title="Max Checkpoint Latency"
downsampleMax
aggregateMax
/>
</Axis>
</LineGraph>,

<LineGraph
title="Changefeed Restarts"
tooltip={`The rate of transient non-fatal errors, such as temporary connectivity issues or a rolling upgrade. This rate constantly becoming non-zero may indicate a more persistent issue.`}
isKvGraph={false}
sources={storeSources}
>
Expand All @@ -92,5 +118,51 @@ export default function (props: GraphDashboardProps) {
/>
</Axis>
</LineGraph>,

<LineGraph
title="Oldest Protected Timestamp"
tooltip={`The oldest data that any changefeed is protecting from being able to be automatically garbage collected.`}
isKvGraph={false}
sources={storeSources}
>
<Axis units={AxisUnits.Duration} label="time">
<Metric
name="cr.node.jobs.changefeed.protected_age_sec"
title="Protected Timestamp Age"
scale={1_000_000_000}
downsampleMax
/>
</Axis>
</LineGraph>,

<LineGraph
title="Backfill Pending Ranges"
tooltip={`The number of ranges being backfilled (ex: due to an initial scan or schema change) that are yet to completely enter the Changefeed pipeline.`}
isKvGraph={false}
sources={storeSources}
>
<Axis units={AxisUnits.Count} label="count">
<Metric
name="cr.node.changefeed.backfill_pending_ranges"
title="Backfill Pending Ranges"
nonNegativeRate
/>
</Axis>
</LineGraph>,

<LineGraph
title="Schema Registry Registrations"
tooltip={`The rate of schema registration requests made by CockroachDB nodes to a configured schema registry endpoint (ex: A Kafka feed pointing to a Confluent Schema Registry)`}
isKvGraph={false}
sources={storeSources}
>
<Axis units={AxisUnits.Count} label="action">
<Metric
name="cr.node.changefeed.schema_registry_registrations"
title="Schema Registry Registrations"
nonNegativeRate
/>
</Axis>
</LineGraph>,
];
}
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,13 @@ export function formatMetricData(
_.each(metrics, (s, idx) => {
const result = data.results[idx];
if (result && !_.isEmpty(result.datapoints)) {
const scaledValues = result.datapoints.map(v => ({
...v,
value: v.value && v.value * (s.props.scale ?? 1),
}));

formattedData.push({
values: result.datapoints,
values: scaledValues,
key: s.props.title || s.props.name,
area: true,
fillOpacity: 0.1,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,12 @@ export interface MetricProps {
sources?: string[];
title?: string;
rate?: boolean;

// How much to multiply the value of the underlying metric, for example if the
// metric was a duration stored in seconds you'd need a scale of 1_000_000_000
// to convert it to our Duration format which assumes Nanoseconds.
scale?: number;

nonNegativeRate?: boolean;
aggregateMax?: boolean;
aggregateMin?: boolean;
Expand Down

0 comments on commit f018344

Please sign in to comment.