Skip to content

Commit

Permalink
fix(metrics): Data Normalization (#3843)
Browse files Browse the repository at this point in the history
Update metrics data normalization to spec.

Fixes GH-3829
  • Loading branch information
philipphofmann authored Apr 10, 2024
1 parent c0f08e7 commit e8114e4
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 13 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

- Add timing API for Metrics (#3812):
- Add [rate limiting](https://develop.sentry.dev/sdk/rate-limiting/) for Metrics (#3838)
- Data normalization for Metrics (#3843)

## 8.23.0

Expand Down
33 changes: 25 additions & 8 deletions Sources/Swift/Metrics/EncodeMetrics.swift
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data {
let buckets = bucket.value
for metric in buckets {

statsdString.append(sanitize(key: metric.key))
statsdString.append(sanitize(metricKey: metric.key))
statsdString.append("@")

statsdString.append(metric.unit.unit)
statsdString.append(sanitize(metricUnit: metric.unit.unit))

for serializedValue in metric.serialize() {
statsdString.append(":\(serializedValue)")
Expand All @@ -24,7 +24,7 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data {

var firstTag = true
for (tagKey, tagValue) in metric.tags {
let sanitizedTagKey = sanitize(key: tagKey)
let sanitizedTagKey = sanitize(tagKey: tagKey)

if firstTag {
statsdString.append("|#")
Expand All @@ -34,7 +34,7 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data {
}

statsdString.append("\(sanitizedTagKey):")
statsdString.append(sanitize(value: tagValue))
statsdString.append(replaceTagValueCharacters(tagValue: tagValue))
}

statsdString.append("|T")
Expand All @@ -46,10 +46,27 @@ func encodeToStatsd(flushableBuckets: [BucketTimestamp: [Metric]]) -> Data {
return statsdString.data(using: .utf8) ?? Data()
}

private func sanitize(key: String) -> String {
return key.replacingOccurrences(of: "[^a-zA-Z0-9_/.-]+", with: "_", options: .regularExpression)
private func sanitize(metricUnit: String) -> String {
// We can't use \w because it includes chars like ä on Swift
return metricUnit.replacingOccurrences(of: "[^a-zA-Z0-9_]", with: "", options: .regularExpression)
}

private func sanitize(value: String) -> String {
return value.replacingOccurrences(of: "[^\\w\\d\\s_:/@\\.\\{\\}\\[\\]$-]+", with: "", options: .regularExpression)
private func sanitize(metricKey: String) -> String {
// We can't use \w because it includes chars like ä on Swift
return metricKey.replacingOccurrences(of: "[^a-zA-Z0-9_.-]+", with: "_", options: .regularExpression)
}

private func sanitize(tagKey: String) -> String {
// We can't use \w because it includes chars like ä on Swift
return tagKey.replacingOccurrences(of: "[^a-zA-Z0-9_/.-]+", with: "", options: .regularExpression)
}

private func replaceTagValueCharacters(tagValue: String) -> String {
var result = tagValue.replacingOccurrences(of: "\\", with: #"\\\\"#)
result = result.replacingOccurrences(of: "\n", with: #"\\n"#)
result = result.replacingOccurrences(of: "\r", with: #"\\r"#)
result = result.replacingOccurrences(of: "\t", with: #"\\t"#)
result = result.replacingOccurrences(of: "|", with: #"\\u{7c}"#)
return result.replacingOccurrences(of: ",", with: #"\\u{2c}"#)

}
18 changes: 13 additions & 5 deletions Tests/SentryTests/Swift/Metrics/EncodeMetricTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -83,23 +83,31 @@ final class EncodeMetricTests: XCTestCase {

let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]])

expect(data.decodeStatsd()) == "abyzABYZ09_/.-_a_a@second:10.1|c|T10234\n"
expect(data.decodeStatsd()) == "abyzABYZ09__.-_a_a@second:10.1|c|T10234\n"
}

func testEncodeCounterMetricWithTagKeyToSanitize() {
let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["abyzABYZ09_/.-!@a#$Äa": "value"])
let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["abcABC123_-./äöü$%&abcABC123": "value"])

let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]])

expect(data.decodeStatsd()) == "app.start@second:10.1|c|#abyzABYZ09_/.-_a_a:value|T10234\n"
expect(data.decodeStatsd()) == "app.start@second:10.1|c|#abcABC123_-./abcABC123:value|T10234\n"
}

func testEncodeCounterMetricWithTagValueToSanitize() {
let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["key": #"azAZ1 _:/@.{}[]$\%^&a*"#])
let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnitDuration.second, tags: ["key": "abc\n\r\t|,\\123"])

let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]])

expect(data.decodeStatsd()) == "app.start@second:10.1|c|#key:azAZ1 _:/@.{}[]$a|T10234\n"
expect(data.decodeStatsd()).to(contain(#"abc\\n\\r\\t\\u{7c}\\u{2c}\\\\123"#))
}

func testEncodeCounterMetricWithUnitToSanitize() {
let counterMetric = CounterMetric(first: 10.1, key: "app.start", unit: MeasurementUnit(unit: "abyzABYZ09_/.ä"), tags: [:])

let data = encodeToStatsd(flushableBuckets: [10_234: [counterMetric]])

expect(data.decodeStatsd()) == "app.start@abyzABYZ09_:10.1|c|T10234\n"
}
}

Expand Down

0 comments on commit e8114e4

Please sign in to comment.