From 38c7fdfce5456d8e302ee3b13f5627a4604742bb Mon Sep 17 00:00:00 2001 From: Daniel Rogers Date: Mon, 24 Oct 2022 16:15:06 -0400 Subject: [PATCH] Add support for regular expression matching and sanitizing of headers in Flask. --- CHANGELOG.md | 2 + .../README.rst | 42 -------- .../instrumentation/flask/__init__.py | 99 ++++++++++++++----- .../tests/base_test.py | 7 ++ .../tests/test_programmatic.py | 56 +++++++++-- 5 files changed, 128 insertions(+), 78 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 377aeb3c6b..9f81832d41 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#1323](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1323)) - `opentelemetry-instrumentation-wsgi` Add support for regular expression matching and sanitization of HTTP headers. ([#1402](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1402)) +- `opentelemetry-instrumentation-flask` Add support for regular expression matching and sanitization of HTTP headers. + ([#1413](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/1413)) ### Fixed diff --git a/instrumentation/opentelemetry-instrumentation-flask/README.rst b/instrumentation/opentelemetry-instrumentation-flask/README.rst index a708e7937a..a65651997d 100644 --- a/instrumentation/opentelemetry-instrumentation-flask/README.rst +++ b/instrumentation/opentelemetry-instrumentation-flask/README.rst @@ -16,48 +16,6 @@ Installation pip install opentelemetry-instrumentation-flask -Configuration -------------- - -Exclude lists -************* -To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_FLASK_EXCLUDED_URLS`` -(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude. - -For example, - -:: - - export OTEL_PYTHON_FLASK_EXCLUDED_URLS="client/.*/info,healthcheck" - -will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``. - -You can also pass the comma delimited regexes to the ``instrument_app`` method directly: - -.. code-block:: python - - FlaskInstrumentor().instrument_app(app, excluded_urls="client/.*/info,healthcheck") - -Request/Response hooks -********************** - -Utilize request/response hooks to execute custom logic to be performed before/after performing a request. Environ is an instance of WSGIEnvironment (flask.request.environ). -Response_headers is a list of key-value (tuples) representing the response headers returned from the response. - -.. code-block:: python - - def request_hook(span: Span, environ: WSGIEnvironment): - if span and span.is_recording(): - span.set_attribute("custom_user_attribute_from_request_hook", "some-value") - - def response_hook(span: Span, status: str, response_headers: List): - if span and span.is_recording(): - span.set_attribute("custom_user_attribute_from_response_hook", "some-value") - - FlaskInstrumentation().instrument(request_hook=request_hook, response_hook=response_hook) - -Flask Request object reference: https://flask.palletsprojects.com/en/2.0.x/api/#flask.Request - References ---------- diff --git a/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py b/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py index 7b416bfdf8..6cb39c7e2c 100644 --- a/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py +++ b/instrumentation/opentelemetry-instrumentation-flask/src/opentelemetry/instrumentation/flask/__init__.py @@ -95,8 +95,9 @@ def hello(): Exclude lists ************* -To exclude certain URLs from being tracked, set the environment variable ``OTEL_PYTHON_FLASK_EXCLUDED_URLS`` -(or ``OTEL_PYTHON_EXCLUDED_URLS`` as fallback) with comma delimited regexes representing which URLs to exclude. +To exclude certain URLs from tracking, set the environment variable ``OTEL_PYTHON_FLASK_EXCLUDED_URLS`` +(or ``OTEL_PYTHON_EXCLUDED_URLS`` to cover all instrumentations) to a string of comma delimited regexes that match the +URLs. For example, @@ -106,7 +107,7 @@ def hello(): will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``. -You can also pass the comma delimited regexes to the ``instrument_app`` method directly: +You can also pass comma delimited regexes directly to the ``instrument_app`` method: .. code-block:: python @@ -115,8 +116,15 @@ def hello(): Request/Response hooks ********************** -Utilize request/response hooks to execute custom logic to be performed before/after performing a request. Environ is an instance of WSGIEnvironment (flask.request.environ). -Response_headers is a list of key-value (tuples) representing the response headers returned from the response. +This instrumentation supports request and response hooks. These are functions that get called +right after a span is created for a request and right before the span is finished for the response. + +- The client request hook is called with the internal span and an instance of WSGIEnvironment (flask.request.environ) + when the method ``receive`` is called. +- The client response hook is called with the internal span, the status of the response and a list of key-value (tuples) + representing the response headers returned from the response when the method ``send`` is called. + +For example, .. code-block:: python @@ -130,58 +138,97 @@ def response_hook(span: Span, status: str, response_headers: List): FlaskInstrumentation().instrument(request_hook=request_hook, response_hook=response_hook) -Flask Request object reference: https://flask.palletsprojects.com/en/2.0.x/api/#flask.Request +Flask Request object reference: https://flask.palletsprojects.com/en/2.1.x/api/#flask.Request Capture HTTP request and response headers ***************************************** -You can configure the agent to capture predefined HTTP headers as span attributes, according to the `semantic convention `_. +You can configure the agent to capture specified HTTP headers as span attributes, according to the +`semantic convention `_. Request headers *************** -To capture predefined HTTP request headers as span attributes, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST`` -to a comma-separated list of HTTP header names. +To capture HTTP request headers as span attributes, set the environment variable +``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST`` to a comma delimited list of HTTP header names. For example, - :: export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST="content-type,custom_request_header" -will extract ``content-type`` and ``custom_request_header`` from request headers and add them as span attributes. +will extract ``content-type`` and ``custom_request_header`` from the request headers and add them as span attributes. + +Request header names in Flask are case-insensitive and ``-`` characters are replaced by ``_``. So, giving the header +name as ``CUStom_Header`` in the environment variable will capture the header named ``custom-header``. + +Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example: +:: -It is recommended that you should give the correct names of the headers to be captured in the environment variable. -Request header names in flask are case insensitive and - characters are replaced by _. So, giving header name as ``CUStom_Header`` in environment variable will be able capture header with name ``custom-header``. + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST="Accept.*,X-.*" -The name of the added span attribute will follow the format ``http.request.header.`` where ```` being the normalized HTTP header name (lowercase, with - characters replaced by _ ). -The value of the attribute will be single item list containing all the header values. +Would match all request headers that start with ``Accept`` and ``X-``. -Example of the added span attribute, +To capture all request headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST`` to ``".*"``. +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST=".*" + +The name of the added span attribute will follow the format ``http.request.header.`` where ```` +is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a +single item list containing all the header values. + +For example: ``http.request.header.custom_request_header = [","]`` Response headers **************** -To capture predefined HTTP response headers as span attributes, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE`` -to a comma-separated list of HTTP header names. +To capture HTTP response headers as span attributes, set the environment variable +``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE`` to a comma delimited list of HTTP header names. For example, - :: export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE="content-type,custom_response_header" -will extract ``content-type`` and ``custom_response_header`` from response headers and add them as span attributes. +will extract ``content-type`` and ``custom_response_header`` from the response headers and add them as span attributes. -It is recommended that you should give the correct names of the headers to be captured in the environment variable. -Response header names captured in flask are case insensitive. So, giving header name as ``CUStomHeader`` in environment variable will be able capture header with name ``customheader``. +Response header names in Flask are case-insensitive. So, giving the header name as ``CUStom-Header`` in the environment +variable will capture the header named ``custom-header``. -The name of the added span attribute will follow the format ``http.response.header.`` where ```` being the normalized HTTP header name (lowercase, with - characters replaced by _ ). -The value of the attribute will be single item list containing all the header values. +Regular expressions may also be used to match multiple headers that correspond to the given pattern. For example: +:: -Example of the added span attribute, + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE="Content.*,X-.*" + +Would match all response headers that start with ``Content`` and ``X-``. + +To capture all response headers, set ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE`` to ``".*"``. +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE=".*" + +The name of the added span attribute will follow the format ``http.response.header.`` where ```` +is the normalized HTTP header name (lowercase, with ``-`` replaced by ``_``). The value of the attribute will be a +single item list containing all the header values. + +For example: ``http.response.header.custom_response_header = [","]`` +Sanitizing headers +****************** +In order to prevent storing sensitive data such as personally identifiable information (PII), session keys, passwords, +etc, set the environment variable ``OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS`` +to a comma delimited list of HTTP header names to be sanitized. Regexes may be used, and all header names will be +matched in a case-insensitive manner. + +For example, +:: + + export OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS=".*session.*,set-cookie" + +will replace the value of headers such as ``session-id`` and ``set-cookie`` with ``[REDACTED]`` in the span. + Note: - Environment variable names to capture http headers are still experimental, and thus are subject to change. + The environment variable names used to capture HTTP headers are still experimental, and thus are subject to change. API --- diff --git a/instrumentation/opentelemetry-instrumentation-flask/tests/base_test.py b/instrumentation/opentelemetry-instrumentation-flask/tests/base_test.py index d3ad9a282c..1da8faa7fd 100644 --- a/instrumentation/opentelemetry-instrumentation-flask/tests/base_test.py +++ b/instrumentation/opentelemetry-instrumentation-flask/tests/base_test.py @@ -42,6 +42,13 @@ def _custom_response_headers(): resp.headers[ "my-custom-header" ] = "my-custom-value-1,my-custom-header-2" + resp.headers[ + "my-custom-regex-header-1" + ] = "my-custom-regex-value-1,my-custom-regex-value-2" + resp.headers[ + "My-Custom-Regex-Header-2" + ] = "my-custom-regex-value-3,my-custom-regex-value-4" + resp.headers["my-secret-header"] = "my-secret-value" return resp def _common_initialization(self): diff --git a/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py b/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py index a64ca48d55..8c231b1d08 100644 --- a/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py +++ b/instrumentation/opentelemetry-instrumentation-flask/tests/test_programmatic.py @@ -36,7 +36,12 @@ from opentelemetry.sdk.resources import Resource from opentelemetry.semconv.trace import SpanAttributes from opentelemetry.test.wsgitestutil import WsgiTestBase -from opentelemetry.util.http import get_excluded_urls +from opentelemetry.util.http import ( + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS, + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST, + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE, + get_excluded_urls, +) # pylint: disable=import-error from .base_test import InstrumentationTest @@ -558,18 +563,18 @@ def test_mark_span_internal_in_presence_of_span_from_other_framework(self): ) +@patch.dict( + "os.environ", + { + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SANITIZE_FIELDS: ".*my-secret.*", + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST: "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3,Regex-Test-Header-.*,Regex-Invalid-Test-Header-.*,.*my-secret.*", + OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE: "content-type,content-length,my-custom-header,invalid-header,my-custom-regex-header-.*,invalid-regex-header-.*,.*my-secret.*", + }, +) class TestCustomRequestResponseHeaders(InstrumentationTest, WsgiTestBase): def setUp(self): super().setUp() - self.env_patch = patch.dict( - "os.environ", - { - "OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_REQUEST": "Custom-Test-Header-1,Custom-Test-Header-2,Custom-Test-Header-3", - "OTEL_INSTRUMENTATION_HTTP_CAPTURE_HEADERS_SERVER_RESPONSE": "content-type,content-length,my-custom-header,invalid-header", - }, - ) - self.env_patch.start() self.app = Flask(__name__) FlaskInstrumentor().instrument_app(self.app) @@ -577,7 +582,6 @@ def setUp(self): def tearDown(self): super().tearDown() - self.env_patch.stop() with self.disable_logging(): FlaskInstrumentor().uninstrument_app(self.app) @@ -585,6 +589,9 @@ def test_custom_request_header_added_in_server_span(self): headers = { "Custom-Test-Header-1": "Test Value 1", "Custom-Test-Header-2": "TestValue2,TestValue3", + "Regex-Test-Header-1": "Regex Test Value 1", + "regex-test-header-2": "RegexTestValue2,RegexTestValue3", + "My-Secret-Header": "My Secret Value", } resp = self.client.get("/hello/123", headers=headers) self.assertEqual(200, resp.status_code) @@ -594,6 +601,11 @@ def test_custom_request_header_added_in_server_span(self): "http.request.header.custom_test_header_2": ( "TestValue2,TestValue3", ), + "http.request.header.regex_test_header_1": ("Regex Test Value 1",), + "http.request.header.regex_test_header_2": ( + "RegexTestValue2,RegexTestValue3", + ), + "http.request.header.my_secret_header": ("[REDACTED]",), } self.assertEqual(span.kind, trace.SpanKind.SERVER) self.assertSpanHasAttributes(span, expected) @@ -604,6 +616,9 @@ def test_custom_request_header_not_added_in_internal_span(self): headers = { "Custom-Test-Header-1": "Test Value 1", "Custom-Test-Header-2": "TestValue2,TestValue3", + "Regex-Test-Header-1": "Regex Test Value 1", + "regex-test-header-2": "RegexTestValue2,RegexTestValue3", + "My-Secret-Header": "My Secret Value", } resp = self.client.get("/hello/123", headers=headers) self.assertEqual(200, resp.status_code) @@ -613,6 +628,13 @@ def test_custom_request_header_not_added_in_internal_span(self): "http.request.header.custom_test_header_2": ( "TestValue2,TestValue3", ), + "http.request.header.regex_test_header_1": ( + "Regex Test Value 1", + ), + "http.request.header.regex_test_header_2": ( + "RegexTestValue2,RegexTestValue3", + ), + "http.request.header.my_secret_header": ("[REDACTED]",), } self.assertEqual(span.kind, trace.SpanKind.INTERNAL) for key, _ in not_expected.items(): @@ -630,6 +652,13 @@ def test_custom_response_header_added_in_server_span(self): "http.response.header.my_custom_header": ( "my-custom-value-1,my-custom-header-2", ), + "http.response.header.my_custom_regex_header_1": ( + "my-custom-regex-value-1,my-custom-regex-value-2", + ), + "http.response.header.my_custom_regex_header_2": ( + "my-custom-regex-value-3,my-custom-regex-value-4", + ), + "http.response.header.my_secret_header": ("[REDACTED]",), } self.assertEqual(span.kind, trace.SpanKind.SERVER) self.assertSpanHasAttributes(span, expected) @@ -648,6 +677,13 @@ def test_custom_response_header_not_added_in_internal_span(self): "http.response.header.my_custom_header": ( "my-custom-value-1,my-custom-header-2", ), + "http.response.header.my_custom_regex_header_1": ( + "my-custom-regex-value-1,my-custom-regex-value-2", + ), + "http.response.header.my_custom_regex_header_2": ( + "my-custom-regex-value-3,my-custom-regex-value-4", + ), + "http.response.header.my_secret_header": ("[REDACTED]",), } self.assertEqual(span.kind, trace.SpanKind.INTERNAL) for key, _ in not_expected.items():