open-telemetry · jpkrohling · May 15, 2024 · May 12, 2023 · May 15, 2023 · May 16, 2023
@@ -0,0 +1,27 @@
+# Use this changelog template to create an entry for release notes.
+
+# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
+change_type: enhancement
+
+# The name of the component, or a single word describing the area of concern, (e.g. filelogreceiver)
+component: probabilisticsamplerprocessor
+
+# A brief description of the change.  Surround your text with quotes ("") if it needs to start with a backtick (`).
+note: Adds the `FailClosed` flag to solidify current behavior when randomness source is missing.
+
+# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists.
+issues: [31918]
+
+# (Optional) One or more lines of additional information to render under the primary note.
+# These lines will be padded with 2 spaces and then inserted directly into the document.
+# Use pipe (|) for multiline entries.
+subtext:
+
+# If your change doesn't affect end users or the exported elements of any package,
+# you should instead start your pull request title with [chore] or use the "Skip Changelog" label.
+# Optional: The change log or logs in which this entry should be included.
+# e.g. '[user]' or '[user, api]'
+# Include 'user' if the change is relevant to end users.
+# Include 'api' if there is a change to a library API.
+# Default: '[user]'
+change_logs: [user]
@@ -566,6 +566,7 @@ require (
 	github.com/open-telemetry/opentelemetry-collector-contrib/internal/kafka v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/internal/sharedcomponent v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/experimentalmetricmetadata v0.100.0 // indirect
+	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/azure v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/winperfcounters v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/processor/probabilisticsamplerprocessor v0.100.0 // indirect
@@ -1226,3 +1227,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/connector/graf
 replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/sumologicextension => ../../extension/sumologicextension
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/receiver/splunkenterprisereceiver => ../../receiver/splunkenterprisereceiver
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling
@@ -472,3 +472,4 @@ replaces:
   - github.com/open-telemetry/opentelemetry-collector-contrib/extension/opampcustommessages => ../../extension/opampcustommessages
   - github.com/open-telemetry/opentelemetry-collector-contrib/confmap/provider/s3provider => ../../confmap/provider/s3provider
   - github.com/open-telemetry/opentelemetry-collector-contrib/confmap/provider/secretsmanagerprovider => ../../confmap/provider/secretsmanagerprovider
+  - github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling
@@ -631,6 +631,7 @@ require (
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.100.0 // indirect
+	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/azure v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/jaeger v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/loki v0.100.0 // indirect
@@ -1293,3 +1294,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/opam
 replace github.com/open-telemetry/opentelemetry-collector-contrib/confmap/provider/s3provider => ../../confmap/provider/s3provider
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/confmap/provider/secretsmanagerprovider => ../../confmap/provider/secretsmanagerprovider
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling
@@ -332,3 +332,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/stor
 replace github.com/openshift/api v3.9.0+incompatible => github.com/openshift/api v0.0.0-20180801171038-322a19404e37
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor => ../../processor/transformprocessor
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling
@@ -250,6 +250,7 @@ require (
 	github.com/open-telemetry/opentelemetry-collector-contrib/internal/filter v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.100.0 // indirect
+	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/prometheus v0.100.0 // indirect
 	github.com/opencontainers/go-digest v1.0.0 // indirect
@@ -427,3 +428,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/tail
 replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/storage => ../../extension/storage
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor => ../../processor/transformprocessor
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../pkg/sampling
@@ -341,3 +341,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/prob
 replace github.com/open-telemetry/opentelemetry-collector-contrib/receiver/prometheusreceiver => ../../../receiver/prometheusreceiver
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/processor/transformprocessor => ../../../processor/transformprocessor
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ../../../pkg/sampling
@@ -578,6 +578,7 @@ require (
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/ottl v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/pdatautil v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/resourcetotelemetry v0.100.0 // indirect
+	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/azure v0.100.0 // indirect
 	github.com/open-telemetry/opentelemetry-collector-contrib/pkg/translator/jaeger v0.100.0 // indirect
@@ -1226,3 +1227,5 @@ replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/enco
 replace github.com/open-telemetry/opentelemetry-collector-contrib/extension/ackextension => ./extension/ackextension
 
 replace github.com/open-telemetry/opentelemetry-collector-contrib/receiver/splunkenterprisereceiver => ./receiver/splunkenterprisereceiver
+
+replace github.com/open-telemetry/opentelemetry-collector-contrib/pkg/sampling => ./pkg/sampling
@@ -15,51 +15,159 @@
 [contrib]: https://github.com/open-telemetry/opentelemetry-collector-releases/tree/main/distributions/otelcol-contrib
 <!-- end autogenerated section -->
 
-The probabilistic sampler supports two types of sampling for traces:
-
-1. `sampling.priority` [semantic
-convention](https://github.com/opentracing/specification/blob/master/semantic_conventions.md#span-tags-table)
-as defined by OpenTracing
-1. Trace ID hashing
-
-The `sampling.priority` semantic convention takes priority over trace ID hashing. As the name
-implies, trace ID hashing samples based on hash values determined by trace IDs.  See [Hashing](#hashing) for more information.
+The probabilistic sampler processor supports several modes of sampling
+for spans and log records.  Sampling is performed on a per-request
+basis, considering individual items statelessly.  For whole trace
+sampling, see
+[tailsamplingprocessor](../tailsamplingprocessor/README.md).
+
+For trace spans, this sampler supports probabilistic sampling based on
+a configured sampling percentage applied to the TraceID.  In addition,
+the sampler recognizes a `sampling.priority` annotation, which can
+force the sampler to apply 0% or 100% sampling.
+
+For log records, this sampler can be configured to use the embedded
+TraceID and follow the same logic as applied to spans.  When the
+TraceID is not defined, the sampler can be configured to apply hashing
+to a selected log record attribute.  This sampler also supports
+sampling priority.
+
+## Consistency guarantee
+
+A consistent probability sampler is a Sampler that supports
+independent sampling decisions for each span or log record in a group
+(e.g. by TraceID), while maximizing the potential for completeness as
+follows.
+
+Consistent probability sampling requires that for any span in a given
+trace, if a Sampler with lesser sampling probability selects the span
+for sampling, then the span would also be selected by a Sampler
+configured with greater sampling probability.
+
+## Completeness property
+
+A trace is complete when all of its members are sampled.  A
+"sub-trace" is complete when all of its descendents are sampled.
+
+Ordinarily, Trace and Logging SDKs configure parent-based samplers
+which decide to sample based on the Context, because it leads to
+completeness.
+
+When non-root spans or logs make independent sampling decisions
+instead of using the parent-based approach (e.g., using the
+`TraceIDRatioBased` sampler for a non-root span), incompleteness may
+result, and when spans and log records are independently sampled in a
+processor, as by this component, the same potential for completeness
+arises.  The consistency guarantee helps minimimize this issue.
+
+Consistent probability samplers can be safely used with a mixture of
+probabilities and preserve sub-trace completeness, provided that child
+spans and log records are sampled with probability greater than or
+equal to the parent context.
+
+Using 1%, 10% and 50% probabilities for example, in a consistent
+probability scheme the 50% sampler must sample when the 10% sampler
+does, and the 10% sampler must sample when the 1% sampler does.  A
+three-tier system could be configured with 1% sampling in the first
+tier, 10% sampling in the second tier, and 50% sampling in the bottom
+tier.  In this configuration, 1% of traces will be complete, 10% of
+traces will be sub-trace complete at the second tier, and 50% of
+traces will be sub-trace complete at the third tier thanks to the
+consistency property.
+
+These guidelines should be considered when deploying multiple
+collectors with different sampling probabilities in a system.  For
+example, a collector serving frontend servers can be configured with
+smaller sampling probability than a collector serving backend servers,
+without breaking sub-trace completeness.
+
+## Sampling randomness
+
+To achieve consistency, sampling randomness is taken from a
+deterministic aspect of the input data.  For traces pipelines, the
+source of randomness is always the TraceID.  For logs pipelines, the
+source of randomness can be the TraceID or another log record
+attribute, if configured.
+
+For log records, the `attribute_source` and `from_attribute` fields determine the
+source of randomness used for log records.  When `attribute_source` is
+set to `traceID`, the TraceID will be used.  When `attribute_source`
+is set to `record` or the TraceID field is absent, the value of
+`from_attribute` is taken as the source of randomness (if configured).
+
+## Sampling priority
+
+The sampling priority mechanism is an override, which takes precedence
+over the probabilistic decision in all modes.
+
+🛑 Compatibility note: Logs and Traces have different behavior.
+
+In traces pipelines, when the priority attribute has value 0, the
+configured probability will by modified to 0% and the item will not
+pass the sampler.  When the priority attribute is non-zero the
+configured probability will be set to 100%.  The sampling priority
+attribute is not configurable, and is called `sampling.priority`.
+
+In logs pipelines, when the priority attribute has value 0, the
+configured probability will by modified to 0%, and the item will not
+pass the sampler.  Otherwise, the logs sampling priority attribute is
+interpreted as a percentage, with values >= 100 equal to 100%
+sampling.  The logs sampling priority attribute is configured via
+`sampling_priority`.
+
+## Sampling algorithm
+
+### Hash seed
+
+The hash seed method uses the FNV hash function applied to either a
+Trace ID (spans, log records), or to the value of a specified
+attribute (only logs).  The hashed value, presumed to be random, is
+compared against a threshold value that corresponds with the sampling
+percentage.
+
+This mode requires configuring the `hash_seed` field.  This mode is
+enabled when the `hash_seed` field is not zero, or when log records
+are sampled with `attribute_source` is set to `record`.
+
+In order for hashing to be consistent, all collectors for a given tier
+(e.g. behind the same load balancer) must have the same
+`hash_seed`. It is also possible to leverage a different `hash_seed`
+at different collector tiers to support additional sampling
+requirements.
+
+This mode uses 14 bits of sampling precision.
+
+### Error handling
+
+This processor considers it an error when the arriving data has no
+randomess.  This includes conditions where the TraceID field is
+invalid (16 zero bytes) and where the log record attribute source has
+zero bytes of information.
+
+By default, when there are errors determining sampling-related
+information from an item of telemetry, the data will be refused.  This
+behavior can be changed by setting the `fail_closed` property to
+false, in which case erroneous data will pass through the processor.
+
+## Configuration
 
 The following configuration options can be modified:
-- `hash_seed` (no default): An integer used to compute the hash algorithm. Note that all collectors for a given tier (e.g. behind the same load balancer) should have the same hash_seed.
-- `sampling_percentage` (default = 0): Percentage at which traces are sampled; >= 100 samples all traces
 
-Examples:
+- `sampling_percentage` (32-bit floating point, required): Percentage at which items are sampled; >= 100 samples all items, 0 rejects all items.
+- `hash_seed` (32-bit unsigned integer, optional, default = 0): An integer used to compute the hash algorithm. Note that all collectors for a given tier (e.g. behind the same load balancer) should have the same hash_seed.
+- `fail_closed` (boolean, optional, default = true): Whether to reject items with sampling-related errors.
 
-```yaml
-processors:
-  probabilistic_sampler:
-    hash_seed: 22
-    sampling_percentage: 15.3
-```
+### Logs-specific configuration
 
-The probabilistic sampler supports sampling logs according to their trace ID, or by a specific log record attribute.
-
-The probabilistic sampler optionally may use a `hash_seed` to compute the hash of a log record.
-This sampler samples based on hash values determined by log records. See [Hashing](#hashing) for more information.
-
-The following configuration options can be modified:
-- `hash_seed` (no default, optional): An integer used to compute the hash algorithm. Note that all collectors for a given tier (e.g. behind the same load balancer) should have the same hash_seed.
-- `sampling_percentage` (required): Percentage at which logs are sampled; >= 100 samples all logs, 0 rejects all logs.
-- `attribute_source` (default = traceID, optional): defines where to look for the attribute in from_attribute. The allowed values are `traceID` or `record`.
-- `from_attribute` (default = null, optional): The optional name of a log record attribute used for sampling purposes, such as a unique log record ID. The value of the attribute is only used if the trace ID is absent or if `attribute_source` is set to `record`.
-- `sampling_priority` (default = null, optional): The optional name of a log record attribute used to set a different sampling priority from the `sampling_percentage` setting. 0 means to never sample the log record, and >= 100 means to always sample the log record.
-
-## Hashing
-
-In order for hashing to work, all collectors for a given tier (e.g. behind the same load balancer)
-must have the same `hash_seed`. It is also possible to leverage a different `hash_seed` at
-different collector tiers to support additional sampling requirements. Please refer to
-[config.go](./config.go) for the config spec.
+- `attribute_source` (string, optional, default = "traceID"): defines where to look for the attribute in from_attribute. The allowed values are `traceID` or `record`.
+- `from_attribute` (string, optional, default = ""): The name of a log record attribute used for sampling purposes, such as a unique log record ID. The value of the attribute is only used if the trace ID is absent or if `attribute_source` is set to `record`.
+- `sampling_priority` (string, optional, default = ""): The name of a log record attribute used to set a different sampling priority from the `sampling_percentage` setting. 0 means to never sample the log record, and >= 100 means to always sample the log record.
 
 Examples:
 
-Sample 15% of the logs:
+Sample 15% of log records according to trace ID using the OpenTelemetry
+specification.
+
 ```yaml
 processors:
   probabilistic_sampler:
@@ -76,7 +184,8 @@ processors:
     from_attribute: logID # value is required if the source is not traceID
 ```
 
-Sample logs according to the attribute `priority`:
+Give sampling priority to log records according to the attribute named
+`priority`:
 
 ```yaml
 processors:
@@ -85,6 +194,7 @@ processors:
     sampling_priority: priority
 ```
 
+## Detailed examples
 
-Refer to [config.yaml](./testdata/config.yaml) for detailed
-examples on using the processor.
+Refer to [config.yaml](./testdata/config.yaml) for detailed examples
+on using the processor.
@@ -35,6 +35,16 @@ type Config struct {
 	// different sampling rates, configuring different seeds avoids that.
 	HashSeed uint32 `mapstructure:"hash_seed"`
 
+	// FailClosed indicates to not sample data (the processor will
+	// fail "closed") in case of error, such as failure to parse
+	// the tracestate field or missing the randomness attribute.
+	//
+	// By default, failure cases are sampled (the processor is
+	// fails "open").  Sampling priority-based decisions are made after
+	// FailClosed is processed, making it possible to sample
+	// despite errors using priority.
+	FailClosed bool `mapstructure:"fail_closed"`
+
 	// AttributeSource (logs only) defines where to look for the attribute in from_attribute. The allowed values are
 	// `traceID` or `record`. Default is `traceID`.
 	AttributeSource `mapstructure:"attribute_source"`