From ed81e4f6669b761d0ba28167556c6e53bce6514d Mon Sep 17 00:00:00 2001 From: Philipp Hofmann Date: Wed, 10 Apr 2024 12:23:43 +0200 Subject: [PATCH] fix(metrics): Data Normalization (#3843) Update metrics data normalization to spec. Fixes GH-3829 --- CHANGELOG.md | 1 + Sources/Swift/Metrics/EncodeMetrics.swift | 33 ++++++++++++++----- .../Swift/Metrics/EncodeMetricTests.swift | 18 +++++++--- 3 files changed, 39 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8a7b9cf5335..a84e2bc090b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Add timing API for Metrics (#3812): - Add [rate limiting](https://develop.sentry.dev/sdk/rate-limiting/) for Metrics (#3838) +- Data normalization for Metrics (#3843) ## 8.23.0 diff --git a/Sources/Swift/Metrics/EncodeMetrics.swift b/Sources/Swift/Metrics/EncodeMetrics.swift index c66fc335c69..83b9762c115 100644 --- a/Sources/Swift/Metrics/EncodeMetrics.swift +++ b/Sources/Swift/Metrics/EncodeMetrics.swift @@ -10,10 +10,10 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data { let buckets = bucket.value for metric in buckets { - statsdString.append(sanitize(key: metric.key)) + statsdString.append(sanitize(metricKey: metric.key)) statsdString.append("@") - statsdString.append(metric.unit.unit) + statsdString.append(sanitize(metricUnit: metric.unit.unit)) for serializedValue in metric.serialize() { statsdString.append(":\(serializedValue)") @@ -24,7 +24,7 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data { var firstTag = true for (tagKey, tagValue) in metric.tags { - let sanitizedTagKey = sanitize(key: tagKey) + let sanitizedTagKey = sanitize(tagKey: tagKey) if firstTag { statsdString.append("|#") @@ -34,7 +34,7 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data { } statsdString.append("\(sanitizedTagKey):") - statsdString.append(sanitize(value: tagValue)) + statsdString.append(replaceTagValueCharacters(tagValue: tagValue)) } statsdString.append("|T") @@ -46,10 +46,27 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data { return statsdString.data(using: .utf8) ?? Data() } -private func sanitize(key: String) -> String { - return key.replacingOccurrences(of: "[^a-zA-Z0-9_/.-]+", with: "_", options: .regularExpression) +private func sanitize(metricUnit: String) -> String { + // We can't use \w because it includes chars like ä on Swift + return metricUnit.replacingOccurrences(of: "[^a-zA-Z0-9_]", with: "", options: .regularExpression) } -private func sanitize(value: String) -> String { - return value.replacingOccurrences(of: "[^\\w\\d\\s_:/@\\.\\{\\}\\[\\]$-]+", with: "", options: .regularExpression) +private func sanitize(metricKey: String) -> String { + // We can't use \w because it includes chars like ä on Swift + return metricKey.replacingOccurrences(of: "[^a-zA-Z0-9_.-]+", with: "_", options: .regularExpression) +} + +private func sanitize(tagKey: String) -> String { + // We can't use \w because it includes chars like ä on Swift + return tagKey.replacingOccurrences(of: "[^a-zA-Z0-9_/.-]+", with: "", options: .regularExpression) +} + +private func replaceTagValueCharacters(tagValue: String) -> String { + var result = tagValue.replacingOccurrences(of: "\\", with: #"\\\\"#) + result = result.replacingOccurrences(of: "\n", with: #"\\n"#) + result = result.replacingOccurrences(of: "\r", with: #"\\r"#) + result = result.replacingOccurrences(of: "\t", with: #"\\t"#) + result = result.replacingOccurrences(of: "|", with: #"\\u{7c}"#) + return result.replacingOccurrences(of: ",", with: #"\\u{2c}"#) + } diff --git a/Tests/SentryTests/Swift/Metrics/EncodeMetricTests.swift b/Tests/SentryTests/Swift/Metrics/EncodeMetricTests.swift index ae8e26de535..825c0ea576e 100644 --- a/Tests/SentryTests/Swift/Metrics/EncodeMetricTests.swift +++ b/Tests/SentryTests/Swift/Metrics/EncodeMetricTests.swift @@ -83,23 +83,31 @@ final class EncodeMetricTests: XCTestCase { let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]]) - expect(data.decodeStatsd()) == "abyzABYZ09_/.-_a_a@second:10.1|c|T10234\n" + expect(data.decodeStatsd()) == "abyzABYZ09__.-_a_a@second:10.1|c|T10234\n" } func testEncodeCounterMetricWithTagKeyToSanitize() { - let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["abyzABYZ09_/.-!@a#$Äa": "value"]) + let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["abcABC123_-./äöü$%&abcABC123": "value"]) let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]]) - expect(data.decodeStatsd()) == "app.start@second:10.1|c|#abyzABYZ09_/.-_a_a:value|T10234\n" + expect(data.decodeStatsd()) == "app.start@second:10.1|c|#abcABC123_-./abcABC123:value|T10234\n" } func testEncodeCounterMetricWithTagValueToSanitize() { - let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["key": #"azAZ1 _:/@.{}[]$\%^&a*"#]) + let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["key": "abc\n\r\t|,\\123"]) let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]]) - expect(data.decodeStatsd()) == "app.start@second:10.1|c|#key:azAZ1 _:/@.{}[]$a|T10234\n" + expect(data.decodeStatsd()).to(contain(#"abc\\n\\r\\t\\u{7c}\\u{2c}\\\\123"#)) + } + + func testEncodeCounterMetricWithUnitToSanitize() { + let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnit(unit: "abyzABYZ09_/.ä"), tags: [:]) + + let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]]) + + expect(data.decodeStatsd()) == "app.start@abyzABYZ09_:10.1|c|T10234\n" } }