Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[statsd] Add origin detection with container ID field #720

Merged
merged 4 commits into from
Mar 8, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 93 additions & 2 deletions datadog/dogstatsd/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
DistributedContextManagerDecorator,
)
from datadog.dogstatsd.route import get_default_route
from datadog.dogstatsd.container import ContainerID
from datadog.util.compat import is_p3k, text
from datadog.util.format import normalize_tags
from datadog.version import __version__
Expand All @@ -43,6 +44,12 @@
# Tag name of entity_id
ENTITY_ID_TAG_NAME = "dd.internal.entity_id"

# Env var name of entity_id
ENTITY_ID_ENV_VAR = "DD_ENTITY_ID"

# Env var to enable/disable sending the container ID field
ORIGIN_DETECTION_ENABLED = "DD_ORIGIN_DETECTION_ENABLED"

# Default buffer settings based on socket type
UDP_OPTIMAL_PAYLOAD_LENGTH = 1432
UDS_OPTIMAL_PAYLOAD_LENGTH = 8192
Expand All @@ -52,7 +59,7 @@

# Mapping of each "DD_" prefixed environment variable to a specific tag name
DD_ENV_TAGS_MAPPING = {
"DD_ENTITY_ID": ENTITY_ID_TAG_NAME,
ENTITY_ID_ENV_VAR: ENTITY_ID_TAG_NAME,
sgnn7 marked this conversation as resolved.
Show resolved Hide resolved
"DD_ENV": "env",
"DD_SERVICE": "service",
"DD_VERSION": "version",
Expand Down Expand Up @@ -101,6 +108,8 @@ def __init__(
telemetry_port=None, # type: Union[str, int]
telemetry_socket_path=None, # type: Text
max_buffer_len=0, # type: int
container_id=None, # type: Optional[Text]
origin_detection_enabled=True, # type: bool
): # type: (...) -> None
"""
Initialize a DogStatsd object.
Expand Down Expand Up @@ -144,6 +153,10 @@ def __init__(
telemetry stats to. If set, it overrides default value.
:type DD_TELEMETRY_PORT: integer

:envvar DD_ORIGIN_DETECTION_ENABLED: Enable/disable sending the container ID field
for origin detection.
:type DD_ORIGIN_DETECTION_ENABLED: boolean

:param host: the host of the DogStatsd server.
:type host: string

Expand Down Expand Up @@ -206,6 +219,27 @@ def __init__(
:param telemetry_socket_path: Submit client telemetry to dogstatsd through a UNIX
socket instead of UDP. If set, disables UDP transmission (Linux only)
:type telemetry_socket_path: string

:param container_id: Allows passing the container ID, this will be used by the Agent to enrich
metrics with container tags.
This feature requires Datadog Agent version >=6.35.0 && <7.0.0 or Agent versions >=7.35.0.
When configured, the provided container ID is prioritized over the container ID discovered
via Origin Detection.
The client prioritizes the value passed via DD_ENTITY_ID (if set) over the container ID.
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved
sgnn7 marked this conversation as resolved.
Show resolved Hide resolved
Default: None.
:type container_id: string

:param origin_detection_enabled: Enable/disable the client origin detection.
This feature requires Datadog Agent version >=6.35.0 && <7.0.0 or Agent versions >=7.35.0.
When enabled, the client tries to discover its container ID and sends it to the Agent
to enrich the metrics with container tags.
Origin detection can be disabled by configuring the environment variabe DD_ORIGIN_DETECTION_ENABLED=false
The client tries to read the container ID by parsing the file /proc/self/cgroup.
This is not supported on Windows.
The client prioritizes the value passed via DD_ENTITY_ID (if set) over the container ID.
Default: True.
More on this: https://docs.datadoghq.com/developers/dogstatsd/?tab=kubernetes#origin-detection-over-udp
:type origin_detection_enabled: boolean
"""

self._socket_lock = Lock()
Expand Down Expand Up @@ -274,10 +308,13 @@ def __init__(
# Options
env_tags = [tag for tag in os.environ.get("DATADOG_TAGS", "").split(",") if tag]
# Inject values of DD_* environment variables as global tags.
has_entity_id = False
for var, tag_name in DD_ENV_TAGS_MAPPING.items():
value = os.environ.get(var, "")
if value:
env_tags.append("{name}:{value}".format(name=tag_name, value=value))
if value == ENTITY_ID_ENV_VAR:
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved
has_entity_id = True
if constant_tags is None:
constant_tags = []
self.constant_tags = constant_tags + env_tags
Expand All @@ -287,6 +324,14 @@ def __init__(
self.use_ms = use_ms
self.default_sample_rate = default_sample_rate

# Origin detection
self._container_id_field = None
if not has_entity_id:
origin_detection_enabled = self._is_origin_detection_enabled(
container_id, origin_detection_enabled, has_entity_id
)
self._set_container_id_field(container_id, origin_detection_enabled)

# init telemetry version
self._client_tags = [
"client:py",
Expand Down Expand Up @@ -717,13 +762,14 @@ def close_socket(self):

def _serialize_metric(self, metric, metric_type, value, tags, sample_rate=1):
# Create/format the metric packet
return "%s%s:%s|%s%s%s" % (
return "%s%s:%s|%s%s%s%s" % (
(self.namespace + ".") if self.namespace else "",
metric,
value,
metric_type,
("|@" + text(sample_rate)) if sample_rate != 1 else "",
("|#" + ",".join(normalize_tags(tags))) if tags else "",
self._get_container_id_field(),
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved
)

def _report(self, metric, metric_type, value, tags, sample_rate):
Expand Down Expand Up @@ -928,6 +974,7 @@ def event(
string = "%s|t:%s" % (string, alert_type)
if tags:
string = "%s|#%s" % (string, ",".join(tags))
string = "%s%s" % (string, self._get_container_id_field())
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved

if len(string) > 8 * 1024:
raise Exception(
Expand Down Expand Up @@ -970,6 +1017,7 @@ def service_check(
string = u"{0}|#{1}".format(string, ",".join(tags))
if message:
string = u"{0}|m:{1}".format(string, message)
string = u"{0}{1}".format(string, self._get_container_id_field())
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved

if self._telemetry:
self.service_checks_count += 1
Expand All @@ -984,5 +1032,48 @@ def _add_constant_tags(self, tags):
return self.constant_tags
return tags

def _is_origin_detection_enabled(self, container_id, origin_detection_enabled, has_entity_id):
"""
Returns whether the client should fill the container field.
If DD_ENTITY_ID is set, we don't send the container ID
If a user-defined container ID is provided, we don't ignore origin detection
as dd.internal.entity_id is prioritized over the container field for backward compatibility.
If DD_ENTITY_ID is not set, we try to fill the container field automatically unless
DD_ORIGIN_DETECTION_ENABLED is explicitly set to false.
"""
if not origin_detection_enabled or has_entity_id or container_id is not None:
# origin detection is explicitly disabled
# or DD_ENTITY_ID was found
# or a user-defined container ID was provided
return False
value = os.environ.get(ORIGIN_DETECTION_ENABLED, "")
return value.lower() not in {"no", "false", "0", "n", "off"}
sgnn7 marked this conversation as resolved.
Show resolved Hide resolved
sgnn7 marked this conversation as resolved.
Show resolved Hide resolved

def _set_container_id_field(self, container_id, origin_detection_enabled):
"""
Initializes the container ID.
It can either be provided by the user or read from cgroups.
"""
container_id_prefix = "|c:"
if container_id:
self._container_id_field = container_id_prefix + container_id
return
if origin_detection_enabled:
try:
reader = ContainerID()
self._container_id_field = container_id_prefix + reader.get_container_id()
except Exception as e:
log.debug("Couldn't get container ID: %s", str(e))
self._container_id_field = None

def _get_container_id_field(self):
"""
Returns the container ID field prefixed and ready to be appended to the datagram.
Returns an empty string if the container ID was not set.
"""
if self._container_id_field:
return self._container_id_field
return ""


statsd = DogStatsd()
63 changes: 63 additions & 0 deletions datadog/dogstatsd/container.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Unless explicitly stated otherwise all files in this repository are licensed under
# the BSD-3-Clause License. This product includes software developed at Datadog
# (https://www.datadoghq.com/).
# Copyright 2015-Present Datadog, Inc

import errno
import re


class UnresolvableContainerID(Exception):
"""
Unable to get container ID from cgroup.
"""


class ContainerID(object):
"""
A reader class that retrieves the current container ID parsed from a the cgroup file.

Returns:
object: ContainerID

Raises:
`NotImplementedError`: No proc filesystem is found (non-Linux systems)
`UnresolvableContainerID`: Unable to read the container ID
"""

CGROUP_PATH = "/proc/self/cgroup"
UUID_SOURCE = r"[0-9a-f]{8}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{4}[-_][0-9a-f]{12}"
CONTAINER_SOURCE = r"[0-9a-f]{64}"
TASK_SOURCE = r"[0-9a-f]{32}-\d+"
LINE_RE = re.compile(r"^(\d+):([^:]*):(.+)$")
CONTAINER_RE = re.compile(r"(?:.+)?({0}|{1}|{2})(?:\.scope)?$".format(UUID_SOURCE, CONTAINER_SOURCE, TASK_SOURCE))

def __init__(self):
self._container_id = self._read_container_id(self.CGROUP_PATH)

def _read_container_id(self, fpath):
try:
with open(fpath, mode="r") as fp:
for line in fp:
line = line.strip()
match = self.LINE_RE.match(line)
if not match:
continue
_, _, path = match.groups()
parts = [p for p in path.split("/")]
if len(parts):
match = self.CONTAINER_RE.match(parts.pop())
if match:
return match.group(1)
except IOError as e:
if e.errno != errno.ENOENT:
raise NotImplementedError("Unable to open {}.".format(self.CGROUP_PATH))
except Exception:
raise UnresolvableContainerID("Unable to read the container ID.")
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved
return None

def get_container_id(self):
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved
ahmed-mez marked this conversation as resolved.
Show resolved Hide resolved
"""
Returns the container ID if found, None otherwise.
"""
return self._container_id
136 changes: 136 additions & 0 deletions tests/unit/dogstatsd/test_container.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# -*- coding: utf-8 -*-

# Unless explicitly stated otherwise all files in this repository are licensed under the BSD-3-Clause License.
# This product includes software developed at Datadog (https://www.datadoghq.com/).
# Copyright 2015-Present Datadog, Inc
"""
Tests for container.py
"""

import mock
import pytest

from datadog.dogstatsd.container import ContainerID


def get_mock_open(read_data=None):
mock_open = mock.mock_open(read_data=read_data)
return mock.patch("datadog.dogstatsd.container.open", mock_open)


@pytest.mark.parametrize(
"file_contents,expected_container_id",
(
# Docker file
(
"""
13:name=systemd:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
12:pids:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
11:hugetlb:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
10:net_prio:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
9:perf_event:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
8:net_cls:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
7:freezer:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
6:devices:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
5:memory:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
4:blkio:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
3:cpuacct:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
2:cpu:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
1:cpuset:/docker/3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860
""",
"3726184226f5d3147c25fdeab5b60097e378e8a720503a5e19ecfdf29f869860",
),
# k8s file
(
"""
11:perf_event:/kubepods/test/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
10:pids:/kubepods/test/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
9:memory:/kubepods/test/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
8:cpu,cpuacct:/kubepods/test/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
7:blkio:/kubepods/test/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
6:cpuset:/kubepods/test/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
5:devices:/kubepods/test/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
4:freezer:/kubepods/test/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
3:net_cls,net_prio:/kubepods/test/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
2:hugetlb:/kubepods/test/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
1:name=systemd:/kubepods/test/pod3d274242-8ee0-11e9-a8a6-1e68d864ef1a/3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1
""",
"3e74d3fd9db4c9dd921ae05c2502fb984d0cde1b36e581b13f79c639da4518a1",
),
# ECS file
(
"""
9:perf_event:/ecs/test-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce
8:memory:/ecs/test-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce
7:hugetlb:/ecs/test-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce
6:freezer:/ecs/test-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce
5:devices:/ecs/test-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce
4:cpuset:/ecs/test-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce
3:cpuacct:/ecs/test-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce
2:cpu:/ecs/test-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce
1:blkio:/ecs/test-ecs-classic/5a0d5ceddf6c44c1928d367a815d890f/38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce
""",
"38fac3e99302b3622be089dd41e7ccf38aff368a86cc339972075136ee2710ce",
),
# Fargate file
(
"""
11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
10:pids:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
9:cpuset:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
8:net_cls,net_prio:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
7:cpu,cpuacct:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
6:perf_event:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
5:freezer:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
4:devices:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
3:blkio:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
2:memory:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
1:name=systemd:/ecs/55091c13-b8cf-4801-b527-f4601742204d/432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da
""",
"432624d2150b349fe35ba397284dea788c2bf66b885d14dfc1569b01890ca7da",
),
# Fargate file >= 1.4.0
(
"""
11:hugetlb:/ecs/55091c13-b8cf-4801-b527-f4601742204d/34dc0b5e626f2c5c4c5170e34b10e765-1234567890
10:pids:/ecs/55091c13-b8cf-4801-b527-f4601742204d/34dc0b5e626f2c5c4c5170e34b10e765-1234567890
9:cpuset:/ecs/55091c13-b8cf-4801-b527-f4601742204d/34dc0b5e626f2c5c4c5170e34b10e765-1234567890
8:net_cls,net_prio:/ecs/55091c13-b8cf-4801-b527-f4601742204d/34dc0b5e626f2c5c4c5170e34b10e765-1234567890
7:cpu,cpuacct:/ecs/55091c13-b8cf-4801-b527-f4601742204d/34dc0b5e626f2c5c4c5170e34b10e765-1234567890
6:perf_event:/ecs/55091c13-b8cf-4801-b527-f4601742204d/34dc0b5e626f2c5c4c5170e34b10e765-1234567890
5:freezer:/ecs/55091c13-b8cf-4801-b527-f4601742204d/34dc0b5e626f2c5c4c5170e34b10e765-1234567890
4:devices:/ecs/55091c13-b8cf-4801-b527-f4601742204d/34dc0b5e626f2c5c4c5170e34b10e765-1234567890
3:blkio:/ecs/55091c13-b8cf-4801-b527-f4601742204d/34dc0b5e626f2c5c4c5170e34b10e765-1234567890
2:memory:/ecs/55091c13-b8cf-4801-b527-f4601742204d/34dc0b5e626f2c5c4c5170e34b10e765-1234567890
1:name=systemd:/ecs/34dc0b5e626f2c5c4c5170e34b10e765-1234567890
""",
"34dc0b5e626f2c5c4c5170e34b10e765-1234567890",
),
# Linux non-containerized file
(
"""
11:blkio:/user.slice/user-0.slice/session-14.scope
10:memory:/user.slice/user-0.slice/session-14.scope
9:hugetlb:/
8:cpuset:/
7:pids:/user.slice/user-0.slice/session-14.scope
6:freezer:/
5:net_cls,net_prio:/
4:perf_event:/
3:cpu,cpuacct:/user.slice/user-0.slice/session-14.scope
2:devices:/user.slice/user-0.slice/session-14.scope
1:name=systemd:/user.slice/user-0.slice/session-14.scope
""",
None,
),
),
)
def test_get_container_id(file_contents, expected_container_id):
with get_mock_open(read_data=file_contents) as mock_open:
if file_contents is None:
mock_open.side_effect = IOError

reader = ContainerID()
assert expected_container_id == reader.get_container_id()

mock_open.assert_called_once_with("/proc/self/cgroup", mode="r")
Loading