open-telemetry · jpkrohling · May 15, 2024 · May 12, 2023 · May 15, 2023 · May 16, 2023
@@ -0,0 +1,27 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: probabilisticsamplerprocessor
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Adds the `FailClosed` flag to solidify current behavior when randomness source is missing.
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [31918]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [user]
diff --git a/Makefile.Common b/Makefile.Common
@@ -26,7 +26,7 @@ SRC_PARENT_DIR :=  $(shell dirname $(SRC_ROOT))
 # build tags required by any component should be defined as an independent variables and later added to GO_BUILD_TAGS below
 GO_BUILD_TAGS=""
 GOTEST_TIMEOUT?= 600s
-GOTEST_OPT?= -race -timeout $(GOTEST_TIMEOUT) -parallel 4 --tags=$(GO_BUILD_TAGS)
+GOTEST_OPT?= -race -timeout $(GOTEST_TIMEOUT) -parallel 4 --tags=$(GO_BUILD_TAGS) -v
 GOTEST_INTEGRATION_OPT?= -race -timeout 360s -parallel 4
 GOTEST_OPT_WITH_COVERAGE = $(GOTEST_OPT) -coverprofile=coverage.txt -covermode=atomic
 GOTEST_OPT_WITH_INTEGRATION=$(GOTEST_INTEGRATION_OPT) -tags=integration,$(GO_BUILD_TAGS)

@@ -511,6 +511,7 @@ require (
 	github.com/open-telemetry/opentelemetry-collector-contrib/internal/kafka v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/internal/sharedcomponent v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/experimentalmetricmetadata v0.99.0 // indirect
+	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/azure v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/winperfcounters v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/processor/probabilisticsamplerprocessor v0.99.0 // indirect
@@ -1159,3 +1160,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/acke
 replace github.com/open-telemetry/opentelemetry-collector-contrib/connector/grafanacloudconnector => ../../connector/grafanacloudconnector
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/sumologicextension => ../../extension/sumologicextension
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling
@@ -456,3 +456,4 @@ replaces:
   - github.com/open-telemetry/opentelemetry-collector-contrib/internal/sqlquery => ../../internal/sqlquery
   - github.com/open-telemetry/opentelemetry-collector-contrib/extension/ackextension => ../../extension/ackextension
   - github.com/open-telemetry/opentelemetry-collector-contrib/extension/googleclientauthextension => ../../extension/googleclientauthextension
+  - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling
@@ -570,6 +570,7 @@ require (
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.99.0 // indirect
+	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/azure v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/jaeger v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/loki v0.99.0 // indirect
@@ -1213,3 +1214,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/internal/sqlqu
 replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/ackextension => ../../extension/ackextension
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/googleclientauthextension => ../../extension/googleclientauthextension
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling
@@ -269,3 +269,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/stor
 replace github.com/openshift/api v3.9.0+incompatible => github.com/openshift/api v0.0.0-20180801171038-322a19404e37
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor => ../../processor/transformprocessor
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling
@@ -198,6 +198,7 @@ require (
 	github.com/open-telemetry/opentelemetry-collector-contrib/internal/filter v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.99.0 // indirect
+	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus v0.99.0 // indirect
 	github.com/opencontainers/go-digest v1.0.0 // indirect
@@ -368,3 +369,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/tail
 replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage => ../../extension/storage
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor => ../../processor/transformprocessor
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling
@@ -278,3 +278,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/prob
 replace github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver => ../../../receiver/prometheusreceiver
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor => ../../../processor/transformprocessor
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../../pkg/sampling
@@ -522,6 +522,7 @@ require (
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.99.0 // indirect
+	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/azure v0.99.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/jaeger v0.99.0 // indirect
@@ -1159,3 +1160,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/enco
 replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/encoding/otlpencodingextension => ./extension/encoding/otlpencodingextension
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/ackextension => ./extension/ackextension
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ./pkg/sampling
@@ -15,51 +15,122 @@
 [contrib]: https://github.com/open-telemetry/opentelemetry-collector-releases/tree/main/distributions/otelcol-contrib
 <!-- end autogenerated section -->
 
-The probabilistic sampler supports two types of sampling for traces:
+The probabilistic sampler processor supports several modes of sampling
+for spans and log records.  Sampling is performed on a per-request
+basis, considering individual items statelessly.  For whole trace
+sampling, see
+[tailsamplingprocessor](../tailsamplingprocessor/README.md).
 
-1. `sampling.priority` [semantic
-convention](https://github.com/opentracing/specification/blob/master/semantic_conventions.md#span-tags-table)
-as defined by OpenTracing
-1. Trace ID hashing
+For trace spans, this sampler supports probabilistic sampling based on
+a configured sampling percentage applied to the TraceID.  In addition,
+the sampler recognizes a `sampling.priority` annotation, which can
+force the sampler to apply 0% or 100% sampling.
 
-The `sampling.priority` semantic convention takes priority over trace ID hashing. As the name
-implies, trace ID hashing samples based on hash values determined by trace IDs.  See [Hashing](#hashing) for more information.
+For log records, this sampler can be configured to use the embedded
+TraceID and follow the same logic as applied to spans.  When the
+TraceID is not defined, the sampler can be configured to apply hashing
+to a selected log record attribute.  This sampler also supports
+sampling priority.
 
-The following configuration options can be modified:
-- `hash_seed` (no default): An integer used to compute the hash algorithm. Note that all collectors for a given tier (e.g. behind the same load balancer) should have the same hash_seed.
-- `sampling_percentage` (default = 0): Percentage at which traces are sampled; >= 100 samples all traces
+## Consistency guarantee
 
-Examples:
+A consistent probability sampler is a Sampler that supports
+independent sampling decisions for each span or log record in a group
+(e.g. by TraceID), maintaining that traces will be complete with a
+certain minimum probability.
 
-```yaml
-processors:
-  probabilistic_sampler:
-    hash_seed: 22
-    sampling_percentage: 15.3
-```
+Consistent probability sampling requires that for any span in a given
+trace, if a Sampler with lesser sampling probability selects the span
+for sampling, then the span would also be selected by a Sampler
+configured with greater sampling probability.
+
+## Sampling randomness
+
+To achieve consistency, sampling randomness is taken from a
+deterministic aspsect of the input data.  For traces pipelines, the
+source of randomness is always the TraceID.  For logs pipelines, the
+source of randomness can be the TraceID or another log record
+attribute, if configured.
+
+For log records, the `attribute_source` and `from_attribute` fields determine the
+source of randomness used for log records.  When `attribute_source` is
+set to `traceID`, the TraceID will be used.  When `attribute_source`
+is set to `record` or the TraceID field is absent, the value of
+`from_attribute` is taken as the source of randomness (if configured).
+
+## Sampling priority
+
+The `sampling.priority` semantic convention takes precedence over the
+probabilistic decision in all modes.
+
+🛑 Compatibility note: Logs and Traces have different behavior.
 
-The probabilistic sampler supports sampling logs according to their trace ID, or by a specific log record attribute.
+In traces pipelines, when the priority attribute has value 0, the
+configured probability will by modified to 0% and the item will not
+pass the sampler.  When the priority attribute is non-zero the
+configured probability will be set to 100%.  The sampling priority
+attribute is not configurable, and is called `sampling.priority`.
 
-The probabilistic sampler optionally may use a `hash_seed` to compute the hash of a log record.
-This sampler samples based on hash values determined by log records. See [Hashing](#hashing) for more information.
+In logs pipelines, when the priority attribute has value 0, the
+configured probability will by modified to 0%, and the item will not
+pass the sampler.  Otherwise, the logs sampling priority attribute is
+interpreted as a percentage, with values >= 100 equal to 100%
+sampling.  The logs sampling priority attribute is configured via
+`sampling_priority`.
+
+## Sampling algorithm
+
+### Hash seed
+
+The hash seed method uses the FNV hash function applied to either a
+Trace ID (spans, log records), or to the value of a specified
+attribute (only logs).  The hashed value, presumed to be random, is
+compared against a threshold value that corresponds with the sampling
+percentage.
+
+This mode requires configuring the `hash_seed` field.  This mode is
+enabled when the `hash_seed` field is not zero, or when log records
+are sampled with `attribute_source` is set to `record`.
+
+In order for hashing to be consistent, all collectors for a given tier
+(e.g. behind the same load balancer) must have the same
+`hash_seed`. It is also possible to leverage a different `hash_seed`
+at different collector tiers to support additional sampling
+requirements.
+
+This mode uses 14 bits of sampling precision.
+
+### Error handling
+
+This processor considers it an error when the arriving data has no
+randomess.  This includes conditions where the TraceID field is
+invalid (16 zero bytes) and where the log record attribute source has
+zero bytes of information.
+
+By default, when there are errors determining sampling-related
+information from an item of telemetry, the data will be refused.  This
+behavior can be changed by setting the `fail_closed` property to
+false, in which case erroneous data will pass through the processor.
+
+## Configuration
 
 The following configuration options can be modified:
-- `hash_seed` (no default, optional): An integer used to compute the hash algorithm. Note that all collectors for a given tier (e.g. behind the same load balancer) should have the same hash_seed.
-- `sampling_percentage` (required): Percentage at which logs are sampled; >= 100 samples all logs, 0 rejects all logs.
-- `attribute_source` (default = traceID, optional): defines where to look for the attribute in from_attribute. The allowed values are `traceID` or `record`.
-- `from_attribute` (default = null, optional): The optional name of a log record attribute used for sampling purposes, such as a unique log record ID. The value of the attribute is only used if the trace ID is absent or if `attribute_source` is set to `record`.
-- `sampling_priority` (default = null, optional): The optional name of a log record attribute used to set a different sampling priority from the `sampling_percentage` setting. 0 means to never sample the log record, and >= 100 means to always sample the log record.
 
-## Hashing
+- `sampling_percentage` (32-bit floating point, required): Percentage at which items are sampled; >= 100 samples all items, 0 rejects all items.
+- `hash_seed` (32-bit unsigned integer, optional, default = 0): An integer used to compute the hash algorithm. Note that all collectors for a given tier (e.g. behind the same load balancer) should have the same hash_seed.
+- `fail_closed` (boolean, optional, default = true): Whether to reject items with sampling-related errors.
+
+### Logs-specific configuration
 
-In order for hashing to work, all collectors for a given tier (e.g. behind the same load balancer)
-must have the same `hash_seed`. It is also possible to leverage a different `hash_seed` at
-different collector tiers to support additional sampling requirements. Please refer to
-[config.go](./config.go) for the config spec.
+- `attribute_source` (string, optional, default = "traceID"): defines where to look for the attribute in from_attribute. The allowed values are `traceID` or `record`.
+- `from_attribute` (string, optional, default = ""): The name of a log record attribute used for sampling purposes, such as a unique log record ID. The value of the attribute is only used if the trace ID is absent or if `attribute_source` is set to `record`.
+- `sampling_priority` (string, optional, default = ""): The name of a log record attribute used to set a different sampling priority from the `sampling_percentage` setting. 0 means to never sample the log record, and >= 100 means to always sample the log record.
 
 Examples:
 
-Sample 15% of the logs:
+Sample 15% of log records according to trace ID using the OpenTelemetry
+specification.
+
 ```yaml
 processors:
   probabilistic_sampler:
@@ -76,7 +147,8 @@ processors:
     from_attribute: logID # value is required if the source is not traceID
 ```
 
-Sample logs according to the attribute `priority`:
+Give sampling priority to log records according to the attribute named
+`priority`:
 
 ```yaml
 processors:
@@ -85,6 +157,7 @@ processors:
     sampling_priority: priority
 ```
 
+## Detailed examples
 
-Refer to [config.yaml](./testdata/config.yaml) for detailed
-examples on using the processor.
+Refer to [config.yaml](./testdata/config.yaml) for detailed examples
+on using the processor.
@@ -35,6 +35,16 @@ type Config struct {
 	// different sampling rates, configuring different seeds avoids that.
 	HashSeed uint32 `mapstructure:"hash_seed"`
 
+	// FailClosed indicates to not sample data (the processor will
+	// fail "closed") in case of error, such as failure to parse
+	// the tracestate field or missing the randomness attribute.
+	//
+	// By default, failure cases are sampled (the processor is
+	// fails "open").  Sampling priority-based decisions are made after
+	// FailClosed is processed, making it possible to sample
+	// despite errors using priority.
+	FailClosed bool `mapstructure:"fail_closed"`
+
 	// AttributeSource (logs only) defines where to look for the attribute in from_attribute. The allowed values are
 	// `traceID` or `record`. Default is `traceID`.
 	AttributeSource `mapstructure:"attribute_source"`

@@ -26,8 +26,8 @@ func TestLoadConfig(t *testing.T) {
 			id: component.NewIDWithName(metadata.Type, ""),
 			expected: &Config{
 				SamplingPercentage: 15.3,
-				HashSeed:           22,
 				AttributeSource:    "traceID",
+				FailClosed:         true,
 			},
 		},
 		{
@@ -38,6 +38,7 @@ func TestLoadConfig(t *testing.T) {
 				AttributeSource:    "record",
 				FromAttribute:      "foo",
 				SamplingPriority:   "bar",
+				FailClosed:         true,
 			},
 		},
 	}
@@ -63,12 +64,21 @@ func TestLoadConfig(t *testing.T) {
 }
 
 func TestLoadInvalidConfig(t *testing.T) {
-	factories, err := otelcoltest.NopFactories()
-	require.NoError(t, err)
+	for _, test := range []struct {
+		file     string
+		contains string
+	}{
+		{"invalid_negative.yaml", "negative sampling rate"},
+	} {
+		t.Run(test.file, func(t *testing.T) {
+			factories, err := otelcoltest.NopFactories()
+			require.NoError(t, err)
 
-	factory := NewFactory()
-	factories.Processors[metadata.Type] = factory
+			factory := NewFactory()
+			factories.Processors[metadata.Type] = factory
 
-	_, err = otelcoltest.LoadConfigAndValidate(filepath.Join("testdata", "invalid.yaml"), factories)
-	require.ErrorContains(t, err, "negative sampling rate: -15.30")
+			_, err = otelcoltest.LoadConfigAndValidate(filepath.Join("testdata", test.file), factories)
+			require.ErrorContains(t, err, test.contains)
+		})
+	}
 }
@@ -20,6 +20,10 @@ import (
 
 var onceMetrics sync.Once
 
+// The default precision is 4 hex digits, slightly more the original
+// component logic's 14-bits of precision.
+const defaultPrecision = 4
+
 // NewFactory returns a new factory for the Probabilistic sampler processor.
 func NewFactory() processor.Factory {
 	onceMetrics.Do(func() {
@@ -37,6 +41,7 @@ func NewFactory() processor.Factory {
 func createDefaultConfig() component.Config {
 	return &Config{
 		AttributeSource: defaultAttributeSource,
+		FailClosed:      true,
 	}
 }