open-telemetry · pellared · Mar 6, 2024 · Mar 4, 2024 · Mar 4, 2024 · Mar 4, 2024
@@ -8,6 +8,10 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 
 ## [Unreleased]
 
+### Fixed
+
+- Clarify the documentation about equivalence guarantees for the `Set` and `Distinct` types in `go.opentelemetry.io/otel/attribute`. (#5027)
+
 ### Removed
 
 - Drop support for [Go 1.20]. (#4967)

@@ -15,15 +15,24 @@ type (
 	// immutable set of attributes, with an internal cache for storing
 	// attribute encodings.
 	//
-	// This type supports the Equivalent method of comparison using values of
-	// type Distinct.
+	// This type will remain comparable for backwards compatibility. The
+	// equivalence of Sets across versions is not guaranteed to be stable.
+	// Prior versions may find two Sets to be equal or not when compared
+	// directly (i.e. ==), but subsequent versions may not. Users should use
+	// the Equals method to ensure stable equivalence checking.
+	//
+	// Users should also use the Distinct returned from Equivalent as a map key
+	// instead of a Set directly. In addition to that type providing guarantees
+	// on stable equivalence, it may also provide performance improvements.
 	Set struct {
 		equivalent Distinct
 	}
 
-	// Distinct wraps a variable-size array of KeyValue, constructed with keys
-	// in sorted order. This can be used as a map key or for equality checking
-	// between Sets.
+	// Distinct is a unique identifier of a Set.
+	//
+	// Distinct is designed to be ensures equivalence stability: comparisons
+	// will return the save value across versions. For this reason, Distinct
+	// should always be used as a map key instead of a Set.
 	Distinct struct {
 		iface interface{}
 	}

@@ -95,7 +95,7 @@ func TestValue(t *testing.T) {
 	}
 }
 
-func TestSetComparability(t *testing.T) {
+func TestEquivalence(t *testing.T) {
 	pairs := [][2]attribute.KeyValue{
 		{
 			attribute.Bool("Bool", true),
@@ -139,12 +139,39 @@ func TestSetComparability(t *testing.T) {
 		},
 	}
 
-	for _, p := range pairs {
-		s0, s1 := attribute.NewSet(p[0]), attribute.NewSet(p[1])
-		m := map[attribute.Set]struct{}{s0: {}}
-		_, ok := m[s1]
-		assert.Truef(t, ok, "%s not comparable", p[0].Value.Type())
-	}
+	t.Run("Distinct", func(t *testing.T) {
+		for _, p := range pairs {
+			s0, s1 := attribute.NewSet(p[0]), attribute.NewSet(p[1])
+			m := map[attribute.Distinct]struct{}{s0.Equivalent(): {}}
+			_, ok := m[s1.Equivalent()]
+			assert.Truef(t, ok, "Distinct comparison of %s type: not equivalent", p[0].Value.Type())
+			assert.Truef(
+				t,
+				ok,
+				"Distinct comparison of %s type: not equivalent: %s != %s",
+				p[0].Value.Type(),
+				s0.Encoded(attribute.DefaultEncoder()),
+				s1.Encoded(attribute.DefaultEncoder()),
+			)
+		}
+	})
+
+	t.Run("Set", func(t *testing.T) {
+		// Maintain backwards compatibility.
+		for _, p := range pairs {
+			s0, s1 := attribute.NewSet(p[0]), attribute.NewSet(p[1])
+			m := map[attribute.Set]struct{}{s0: {}}
+			_, ok := m[s1]
+			assert.Truef(
+				t,
+				ok,
+				"Set comparison of %s type: not equivalent: %s != %s",
+				p[0].Value.Type(),
+				s0.Encoded(attribute.DefaultEncoder()),
+				s1.Encoded(attribute.DefaultEncoder()),
+			)
+		}
+	})
 }
 
 func TestAsSlice(t *testing.T) {

@@ -30,7 +30,8 @@ const (
 
 // expoHistogramDataPoint is a single data point in an exponential histogram.
 type expoHistogramDataPoint[N int64 | float64] struct {
-	res exemplar.Reservoir[N]
+	attrs attribute.Set
+	res   exemplar.Reservoir[N]
 
 	count uint64
 	min   N
@@ -48,7 +49,7 @@ type expoHistogramDataPoint[N int64 | float64] struct {
 	zeroCount  uint64
 }
 
-func newExpoHistogramDataPoint[N int64 | float64](maxSize, maxScale int, noMinMax, noSum bool) *expoHistogramDataPoint[N] {
+func newExpoHistogramDataPoint[N int64 | float64](attrs attribute.Set, maxSize, maxScale int, noMinMax, noSum bool) *expoHistogramDataPoint[N] {
 	f := math.MaxFloat64
 	max := N(f) // if N is int64, max will overflow to -9223372036854775808
 	min := N(-f)
@@ -57,6 +58,7 @@ func newExpoHistogramDataPoint[N int64 | float64](maxSize, maxScale int, noMinMa
 		min = N(minInt64)
 	}
 	return &expoHistogramDataPoint[N]{
+		attrs:    attrs,
 		min:      max,
 		max:      min,
 		maxSize:  maxSize,
@@ -289,7 +291,7 @@ func newExponentialHistogram[N int64 | float64](maxSize, maxScale int32, noMinMa
 
 		newRes: r,
 		limit:  newLimiter[*expoHistogramDataPoint[N]](limit),
-		values: make(map[attribute.Set]*expoHistogramDataPoint[N]),
+		values: make(map[attribute.Distinct]*expoHistogramDataPoint[N]),
 
 		start: now(),
 	}
@@ -305,7 +307,7 @@ type expoHistogram[N int64 | float64] struct {
 
 	newRes   func() exemplar.Reservoir[N]
 	limit    limiter[*expoHistogramDataPoint[N]]
-	values   map[attribute.Set]*expoHistogramDataPoint[N]
+	values   map[attribute.Distinct]*expoHistogramDataPoint[N]
 	valuesMu sync.Mutex
 
 	start time.Time
@@ -323,12 +325,12 @@ func (e *expoHistogram[N]) measure(ctx context.Context, value N, fltrAttr attrib
 	defer e.valuesMu.Unlock()
 
 	attr := e.limit.Attributes(fltrAttr, e.values)
-	v, ok := e.values[attr]
+	v, ok := e.values[attr.Equivalent()]
 	if !ok {
-		v = newExpoHistogramDataPoint[N](e.maxSize, e.maxScale, e.noMinMax, e.noSum)
+		v = newExpoHistogramDataPoint[N](attr, e.maxSize, e.maxScale, e.noMinMax, e.noSum)
 		v.res = e.newRes()
 
-		e.values[attr] = v
+		e.values[attr.Equivalent()] = v
 	}
 	v.record(value)
 	v.res.Offer(ctx, t, value, droppedAttr)
@@ -349,32 +351,32 @@ func (e *expoHistogram[N]) delta(dest *metricdata.Aggregation) int {
 	hDPts := reset(h.DataPoints, n, n)
 
 	var i int
-	for a, b := range e.values {
-		hDPts[i].Attributes = a
+	for _, val := range e.values {
+		hDPts[i].Attributes = val.attrs
 		hDPts[i].StartTime = e.start
 		hDPts[i].Time = t
-		hDPts[i].Count = b.count
-		hDPts[i].Scale = int32(b.scale)
-		hDPts[i].ZeroCount = b.zeroCount
+		hDPts[i].Count = val.count
+		hDPts[i].Scale = int32(val.scale)
+		hDPts[i].ZeroCount = val.zeroCount
 		hDPts[i].ZeroThreshold = 0.0
 
-		hDPts[i].PositiveBucket.Offset = int32(b.posBuckets.startBin)
-		hDPts[i].PositiveBucket.Counts = reset(hDPts[i].PositiveBucket.Counts, len(b.posBuckets.counts), len(b.posBuckets.counts))
-		copy(hDPts[i].PositiveBucket.Counts, b.posBuckets.counts)
+		hDPts[i].PositiveBucket.Offset = int32(val.posBuckets.startBin)
+		hDPts[i].PositiveBucket.Counts = reset(hDPts[i].PositiveBucket.Counts, len(val.posBuckets.counts), len(val.posBuckets.counts))
+		copy(hDPts[i].PositiveBucket.Counts, val.posBuckets.counts)
 
-		hDPts[i].NegativeBucket.Offset = int32(b.negBuckets.startBin)
-		hDPts[i].NegativeBucket.Counts = reset(hDPts[i].NegativeBucket.Counts, len(b.negBuckets.counts), len(b.negBuckets.counts))
-		copy(hDPts[i].NegativeBucket.Counts, b.negBuckets.counts)
+		hDPts[i].NegativeBucket.Offset = int32(val.negBuckets.startBin)
+		hDPts[i].NegativeBucket.Counts = reset(hDPts[i].NegativeBucket.Counts, len(val.negBuckets.counts), len(val.negBuckets.counts))
+		copy(hDPts[i].NegativeBucket.Counts, val.negBuckets.counts)
 
 		if !e.noSum {
-			hDPts[i].Sum = b.sum
+			hDPts[i].Sum = val.sum
 		}
 		if !e.noMinMax {
-			hDPts[i].Min = metricdata.NewExtrema(b.min)
-			hDPts[i].Max = metricdata.NewExtrema(b.max)
+			hDPts[i].Min = metricdata.NewExtrema(val.min)
+			hDPts[i].Max = metricdata.NewExtrema(val.max)
 		}
 
-		b.res.Collect(&hDPts[i].Exemplars)
+		val.res.Collect(&hDPts[i].Exemplars)
 
 		i++
 	}
@@ -402,32 +404,32 @@ func (e *expoHistogram[N]) cumulative(dest *metricdata.Aggregation) int {
 	hDPts := reset(h.DataPoints, n, n)
 
 	var i int
-	for a, b := range e.values {
-		hDPts[i].Attributes = a
+	for _, val := range e.values {
+		hDPts[i].Attributes = val.attrs
 		hDPts[i].StartTime = e.start
 		hDPts[i].Time = t
-		hDPts[i].Count = b.count
-		hDPts[i].Scale = int32(b.scale)
-		hDPts[i].ZeroCount = b.zeroCount
+		hDPts[i].Count = val.count
+		hDPts[i].Scale = int32(val.scale)
+		hDPts[i].ZeroCount = val.zeroCount
 		hDPts[i].ZeroThreshold = 0.0
 
-		hDPts[i].PositiveBucket.Offset = int32(b.posBuckets.startBin)
-		hDPts[i].PositiveBucket.Counts = reset(hDPts[i].PositiveBucket.Counts, len(b.posBuckets.counts), len(b.posBuckets.counts))
-		copy(hDPts[i].PositiveBucket.Counts, b.posBuckets.counts)
+		hDPts[i].PositiveBucket.Offset = int32(val.posBuckets.startBin)
+		hDPts[i].PositiveBucket.Counts = reset(hDPts[i].PositiveBucket.Counts, len(val.posBuckets.counts), len(val.posBuckets.counts))
+		copy(hDPts[i].PositiveBucket.Counts, val.posBuckets.counts)
 
-		hDPts[i].NegativeBucket.Offset = int32(b.negBuckets.startBin)
-		hDPts[i].NegativeBucket.Counts = reset(hDPts[i].NegativeBucket.Counts, len(b.negBuckets.counts), len(b.negBuckets.counts))
-		copy(hDPts[i].NegativeBucket.Counts, b.negBuckets.counts)
+		hDPts[i].NegativeBucket.Offset = int32(val.negBuckets.startBin)
+		hDPts[i].NegativeBucket.Counts = reset(hDPts[i].NegativeBucket.Counts, len(val.negBuckets.counts), len(val.negBuckets.counts))
+		copy(hDPts[i].NegativeBucket.Counts, val.negBuckets.counts)
 
 		if !e.noSum {
-			hDPts[i].Sum = b.sum
+			hDPts[i].Sum = val.sum
 		}
 		if !e.noMinMax {
-			hDPts[i].Min = metricdata.NewExtrema(b.min)
-			hDPts[i].Max = metricdata.NewExtrema(b.max)
+			hDPts[i].Min = metricdata.NewExtrema(val.min)
+			hDPts[i].Max = metricdata.NewExtrema(val.max)
 		}
 
-		b.res.Collect(&hDPts[i].Exemplars)
+		val.res.Collect(&hDPts[i].Exemplars)
 
 		i++
 		// TODO (#3006): This will use an unbounded amount of memory if there

@@ -130,7 +130,7 @@ func testExpoHistogramDataPointRecord[N int64 | float64](t *testing.T) {
 			restore := withHandler(t)
 			defer restore()
 
-			dp := newExpoHistogramDataPoint[N](tt.maxSize, 20, false, false)
+			dp := newExpoHistogramDataPoint[N](alice, tt.maxSize, 20, false, false)
 			for _, v := range tt.values {
 				dp.record(v)
 				dp.record(-v)
@@ -176,7 +176,7 @@ func testExpoHistogramMinMaxSumInt64(t *testing.T) {
 			for _, v := range tt.values {
 				h.measure(context.Background(), v, alice, nil)
 			}
-			dp := h.values[alice]
+			dp := h.values[alice.Equivalent()]
 
 			assert.Equal(t, tt.expected.max, dp.max)
 			assert.Equal(t, tt.expected.min, dp.min)
@@ -218,7 +218,7 @@ func testExpoHistogramMinMaxSumFloat64(t *testing.T) {
 			for _, v := range tt.values {
 				h.measure(context.Background(), v, alice, nil)
 			}
-			dp := h.values[alice]
+			dp := h.values[alice.Equivalent()]
 
 			assert.Equal(t, tt.expected.max, dp.max)
 			assert.Equal(t, tt.expected.min, dp.min)
@@ -305,7 +305,7 @@ func testExpoHistogramDataPointRecordFloat64(t *testing.T) {
 			restore := withHandler(t)
 			defer restore()
 
-			dp := newExpoHistogramDataPoint[float64](tt.maxSize, 20, false, false)
+			dp := newExpoHistogramDataPoint[float64](alice, tt.maxSize, 20, false, false)
 			for _, v := range tt.values {
 				dp.record(v)
 				dp.record(-v)
@@ -322,21 +322,21 @@ func TestExponentialHistogramDataPointRecordLimits(t *testing.T) {
 	// These bins are calculated from the following formula:
 	// floor( log2( value) * 2^20 ) using an arbitrary precision calculator.
 
-	fdp := newExpoHistogramDataPoint[float64](4, 20, false, false)
+	fdp := newExpoHistogramDataPoint[float64](alice, 4, 20, false, false)
 	fdp.record(math.MaxFloat64)
 
 	if fdp.posBuckets.startBin != 1073741823 {
 		t.Errorf("Expected startBin to be 1073741823, got %d", fdp.posBuckets.startBin)
 	}
 
-	fdp = newExpoHistogramDataPoint[float64](4, 20, false, false)
+	fdp = newExpoHistogramDataPoint[float64](alice, 4, 20, false, false)
 	fdp.record(math.SmallestNonzeroFloat64)
 
 	if fdp.posBuckets.startBin != -1126170625 {
 		t.Errorf("Expected startBin to be -1126170625, got %d", fdp.posBuckets.startBin)
 	}
 
-	idp := newExpoHistogramDataPoint[int64](4, 20, false, false)
+	idp := newExpoHistogramDataPoint[int64](alice, 4, 20, false, false)
 	idp.record(math.MaxInt64)
 
 	if idp.posBuckets.startBin != 66060287 {
@@ -641,7 +641,7 @@ func TestScaleChange(t *testing.T) {
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			p := newExpoHistogramDataPoint[float64](tt.args.maxSize, 20, false, false)
+			p := newExpoHistogramDataPoint[float64](alice, tt.args.maxSize, 20, false, false)
 			got := p.scaleChange(tt.args.bin, tt.args.startBin, tt.args.length)
 			if got != tt.want {
 				t.Errorf("scaleChange() = %v, want %v", got, tt.want)
@@ -652,7 +652,7 @@ func TestScaleChange(t *testing.T) {
 
 func BenchmarkPrepend(b *testing.B) {
 	for i := 0; i < b.N; i++ {
-		agg := newExpoHistogramDataPoint[float64](1024, 20, false, false)
+		agg := newExpoHistogramDataPoint[float64](alice, 1024, 20, false, false)
 		n := math.MaxFloat64
 		for j := 0; j < 1024; j++ {
 			agg.record(n)
@@ -663,7 +663,7 @@ func BenchmarkPrepend(b *testing.B) {
 
 func BenchmarkAppend(b *testing.B) {
 	for i := 0; i < b.N; i++ {
-		agg := newExpoHistogramDataPoint[float64](1024, 20, false, false)
+		agg := newExpoHistogramDataPoint[float64](alice, 1024, 20, false, false)
 		n := smallestNonZeroNormalFloat64
 		for j := 0; j < 1024; j++ {
 			agg.record(n)
@@ -704,6 +704,7 @@ func BenchmarkExponentialHistogram(b *testing.B) {
 
 func TestSubNormal(t *testing.T) {
 	want := &expoHistogramDataPoint[float64]{
+		attrs:   alice,
 		maxSize: 4,
 		count:   3,
 		min:     math.SmallestNonzeroFloat64,
@@ -717,7 +718,7 @@ func TestSubNormal(t *testing.T) {
 		},
 	}
 
-	ehdp := newExpoHistogramDataPoint[float64](4, 20, false, false)
+	ehdp := newExpoHistogramDataPoint[float64](alice, 4, 20, false, false)
 	ehdp.record(math.SmallestNonzeroFloat64)
 	ehdp.record(math.SmallestNonzeroFloat64)
 	ehdp.record(math.SmallestNonzeroFloat64)
@@ -1060,7 +1061,7 @@ func FuzzGetBin(f *testing.F) {
 			t.Skip("skipping test for zero")
 		}
 
-		p := newExpoHistogramDataPoint[float64](4, 20, false, false)
+		p := newExpoHistogramDataPoint[float64](alice, 4, 20, false, false)
 		// scale range is -10 to 20.
 		p.scale = (scale%31+31)%31 - 10
 		got := p.getBin(v)