From 54bb85c96f7229fc6eaa94194d8b921f65709d61 Mon Sep 17 00:00:00 2001 From: Connor Adams Date: Tue, 30 Jun 2020 09:40:18 -0400 Subject: [PATCH 1/2] xclude URLs exclude list --- ext/opentelemetry-ext-django/CHANGELOG.md | 2 ++ ext/opentelemetry-ext-django/README.rst | 12 ++++--- .../opentelemetry/ext/django/middleware.py | 31 ++++++------------- .../tests/test_middleware.py | 31 +++++++++++++++++-- ext/opentelemetry-ext-django/tests/views.py | 6 +++- ext/opentelemetry-ext-flask/CHANGELOG.md | 2 ++ ext/opentelemetry-ext-flask/README.rst | 11 ++++--- .../src/opentelemetry/ext/flask/__init__.py | 30 ++++++------------ .../tests/test_programmatic.py | 24 ++++++++++++++ ext/opentelemetry-ext-pyramid/CHANGELOG.md | 2 ++ ext/opentelemetry-ext-pyramid/README.rst | 11 +++++++ .../opentelemetry/ext/pyramid/callbacks.py | 24 +++++--------- .../tests/test_programmatic.py | 9 ++---- .../src/opentelemetry/util/__init__.py | 31 +++++-------------- 14 files changed, 127 insertions(+), 99 deletions(-) diff --git a/ext/opentelemetry-ext-django/CHANGELOG.md b/ext/opentelemetry-ext-django/CHANGELOG.md index 9c578d962f..abbf90451b 100644 --- a/ext/opentelemetry-ext-django/CHANGELOG.md +++ b/ext/opentelemetry-ext-django/CHANGELOG.md @@ -2,6 +2,8 @@ ## Unreleased +- Use one general exclude list instead of two ([#872](https://github.com/open-telemetry/opentelemetry-python/pull/872)) + ## 0.8b0 Released 2020-05-27 diff --git a/ext/opentelemetry-ext-django/README.rst b/ext/opentelemetry-ext-django/README.rst index 834bd63249..1759cb6602 100644 --- a/ext/opentelemetry-ext-django/README.rst +++ b/ext/opentelemetry-ext-django/README.rst @@ -20,11 +20,15 @@ Configuration Exclude lists ************* -Excludes certain hosts and paths from being tracked. Pass in comma delimited string into environment variables. -Host refers to the entire url and path refers to the part of the url after the domain. Host matches the exact string that is given, where as path matches if the url starts with the given excluded path. +To exclude certain URLs from being tracked, set the environment variable ``OPENTELEMETRY_PYTHON_DJANGO_EXCLUDED_URLS`` with comma delimited regexes representing which URLs to exclude. -Excluded hosts: OPENTELEMETRY_PYTHON_DJANGO_EXCLUDED_HOSTS -Excluded paths: OPENTELEMETRY_PYTHON_DJANGO_EXCLUDED_PATHS +For example, + +:: + + export OPENTELEMETRY_PYTHON_DJANGO_EXCLUDED_URLS="client/.*/info,healthcheck" + +will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``. References ---------- diff --git a/ext/opentelemetry-ext-django/src/opentelemetry/ext/django/middleware.py b/ext/opentelemetry-ext-django/src/opentelemetry/ext/django/middleware.py index 0ce03b97cb..c0cb244756 100644 --- a/ext/opentelemetry-ext-django/src/opentelemetry/ext/django/middleware.py +++ b/ext/opentelemetry-ext-django/src/opentelemetry/ext/django/middleware.py @@ -24,7 +24,7 @@ ) from opentelemetry.propagators import extract from opentelemetry.trace import SpanKind, get_tracer -from opentelemetry.util import disable_trace +from opentelemetry.util import ExcludeList try: from django.utils.deprecation import MiddlewareMixin @@ -44,12 +44,11 @@ class _DjangoMiddleware(MiddlewareMixin): _environ_token = "opentelemetry-instrumentor-django.token" _environ_span_key = "opentelemetry-instrumentor-django.span_key" - _excluded_hosts = Configuration().DJANGO_EXCLUDED_HOSTS or [] - _excluded_paths = Configuration().DJANGO_EXCLUDED_PATHS or [] - if _excluded_hosts: - _excluded_hosts = str.split(_excluded_hosts, ",") - if _excluded_paths: - _excluded_paths = str.split(_excluded_paths, ",") + _excluded_urls = Configuration().DJANGO_EXCLUDED_URLS or [] + if _excluded_urls: + _excluded_urls = ExcludeList(str.split(_excluded_urls, ",")) + else: + _excluded_urls = ExcludeList(_excluded_urls) def process_view( self, request, view_func, view_args, view_kwargs @@ -62,11 +61,7 @@ def process_view( # key.lower().replace('_', '-').replace("http-", "", 1): value # for key, value in request.META.items() # } - if disable_trace( - request.build_absolute_uri("?"), - self._excluded_hosts, - self._excluded_paths, - ): + if self._excluded_urls.url_disabled(request.build_absolute_uri("?")): return environ = request.META @@ -97,11 +92,7 @@ def process_exception(self, request, exception): # Django can call this method and process_response later. In order # to avoid __exit__ and detach from being called twice then, the # respective keys are being removed here. - if disable_trace( - request.build_absolute_uri("?"), - self._excluded_hosts, - self._excluded_paths, - ): + if self._excluded_urls.url_disabled(request.build_absolute_uri("?")): return if self._environ_activation_key in request.META.keys(): @@ -116,11 +107,7 @@ def process_exception(self, request, exception): request.META.pop(self._environ_token, None) def process_response(self, request, response): - if disable_trace( - request.build_absolute_uri("?"), - self._excluded_hosts, - self._excluded_paths, - ): + if self._excluded_urls.url_disabled(request.build_absolute_uri("?")): return response if ( diff --git a/ext/opentelemetry-ext-django/tests/test_middleware.py b/ext/opentelemetry-ext-django/tests/test_middleware.py index e0ac92de52..e5c3e10d03 100644 --- a/ext/opentelemetry-ext-django/tests/test_middleware.py +++ b/ext/opentelemetry-ext-django/tests/test_middleware.py @@ -13,6 +13,7 @@ # limitations under the License. from sys import modules +from unittest.mock import patch from django.conf import settings from django.conf.urls import url @@ -24,15 +25,17 @@ from opentelemetry.test.wsgitestutil import WsgiTestBase from opentelemetry.trace import SpanKind from opentelemetry.trace.status import StatusCanonicalCode +from opentelemetry.util import ExcludeList # pylint: disable=import-error -from .views import error, excluded, excluded2, traced +from .views import error, excluded, excluded_noarg, excluded_noarg2, traced urlpatterns = [ url(r"^traced/", traced), url(r"^error/", error), - url(r"^excluded/", excluded), - url(r"^excluded2/", excluded2), + url(r"^excluded_arg/", excluded), + url(r"^excluded_noarg/", excluded_noarg), + url(r"^excluded_noarg2/", excluded_noarg2), ] _django_instrumentor = DjangoInstrumentor() @@ -111,3 +114,25 @@ def test_error(self): span.attributes["http.url"], "http://testserver/error/" ) self.assertEqual(span.attributes["http.scheme"], "http") + + @patch( + "opentelemetry.ext.django.middleware._DjangoMiddleware._excluded_urls", + ExcludeList(["http://testserver/excluded_arg/123", "excluded_noarg"]), + ) + def test_exclude_lists(self): + client = Client() + client.get("/excluded_arg/123") + span_list = self.memory_exporter.get_finished_spans() + self.assertEqual(len(span_list), 0) + + client.get("/excluded_arg/125") + span_list = self.memory_exporter.get_finished_spans() + self.assertEqual(len(span_list), 1) + + client.get("/excluded_noarg/") + span_list = self.memory_exporter.get_finished_spans() + self.assertEqual(len(span_list), 1) + + client.get("/excluded_noarg2/") + span_list = self.memory_exporter.get_finished_spans() + self.assertEqual(len(span_list), 1) diff --git a/ext/opentelemetry-ext-django/tests/views.py b/ext/opentelemetry-ext-django/tests/views.py index 9035f0ea03..b5a2930404 100644 --- a/ext/opentelemetry-ext-django/tests/views.py +++ b/ext/opentelemetry-ext-django/tests/views.py @@ -13,5 +13,9 @@ def excluded(request): # pylint: disable=unused-argument return HttpResponse() -def excluded2(request): # pylint: disable=unused-argument +def excluded_noarg(request): # pylint: disable=unused-argument + return HttpResponse() + + +def excluded_noarg2(request): # pylint: disable=unused-argument return HttpResponse() diff --git a/ext/opentelemetry-ext-flask/CHANGELOG.md b/ext/opentelemetry-ext-flask/CHANGELOG.md index d5b1635919..1db23d4e80 100644 --- a/ext/opentelemetry-ext-flask/CHANGELOG.md +++ b/ext/opentelemetry-ext-flask/CHANGELOG.md @@ -2,6 +2,8 @@ ## Unreleased +- Use one general exclude list instead of two ([#872](https://github.com/open-telemetry/opentelemetry-python/pull/872)) + ## 0.7b1 Released 2020-05-12 diff --git a/ext/opentelemetry-ext-flask/README.rst b/ext/opentelemetry-ext-flask/README.rst index 0a4c894077..395053972e 100644 --- a/ext/opentelemetry-ext-flask/README.rst +++ b/ext/opentelemetry-ext-flask/README.rst @@ -21,12 +21,15 @@ Configuration Exclude lists ************* -Excludes certain hosts and paths from being tracked. Pass in comma delimited string into environment variables. -Host refers to the entire url and path refers to the part of the url after the domain. Host matches the exact string that is given, where as path matches if the url starts with the given excluded path. +To exclude certain URLs from being tracked, set the environment variable ``OPENTELEMETRY_PYTHON_FLASK_EXCLUDED_URLS`` with comma delimited regexes representing which URLs to exclude. -Excluded hosts: OPENTELEMETRY_PYTHON_FLASK_EXCLUDED_HOSTS -Excluded paths: OPENTELEMETRY_PYTHON_FLASK_EXCLUDED_PATHS +For example, +:: + + export OPENTELEMETRY_PYTHON_FLASK_EXCLUDED_URLS="client/.*/info,healthcheck" + +will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``. References ---------- diff --git a/ext/opentelemetry-ext-flask/src/opentelemetry/ext/flask/__init__.py b/ext/opentelemetry-ext-flask/src/opentelemetry/ext/flask/__init__.py index 20d6501a93..464958a7e9 100644 --- a/ext/opentelemetry-ext-flask/src/opentelemetry/ext/flask/__init__.py +++ b/ext/opentelemetry-ext-flask/src/opentelemetry/ext/flask/__init__.py @@ -55,7 +55,7 @@ def hello(): from opentelemetry import configuration, context, propagators, trace from opentelemetry.ext.flask.version import __version__ from opentelemetry.instrumentation.instrumentor import BaseInstrumentor -from opentelemetry.util import disable_trace, time_ns +from opentelemetry.util import ExcludeList, time_ns _logger = getLogger(__name__) @@ -65,22 +65,14 @@ def hello(): _ENVIRON_TOKEN = "opentelemetry-flask.token" -def get_excluded_hosts(): - hosts = configuration.Configuration().FLASK_EXCLUDED_HOSTS or [] - if hosts: - hosts = str.split(hosts, ",") - return hosts +def get_excluded_urls(): + urls = configuration.Configuration().FLASK_EXCLUDED_URLS or [] + if urls: + urls = str.split(urls, ",") + return ExcludeList(urls) -def get_excluded_paths(): - paths = configuration.Configuration().FLASK_EXCLUDED_PATHS or [] - if paths: - paths = str.split(paths, ",") - return paths - - -_excluded_hosts = get_excluded_hosts() -_excluded_paths = get_excluded_paths() +_excluded_urls = get_excluded_urls() def _rewrapped_app(wsgi_app): @@ -92,9 +84,7 @@ def _wrapped_app(environ, start_response): environ[_ENVIRON_STARTTIME_KEY] = time_ns() def _start_response(status, response_headers, *args, **kwargs): - if not disable_trace( - flask.request.url, _excluded_hosts, _excluded_paths - ): + if not _excluded_urls.url_disabled(flask.request.url): span = flask.request.environ.get(_ENVIRON_SPAN_KEY) if span: @@ -116,7 +106,7 @@ def _start_response(status, response_headers, *args, **kwargs): def _before_request(): - if disable_trace(flask.request.url, _excluded_hosts, _excluded_paths): + if _excluded_urls.url_disabled(flask.request.url): return environ = flask.request.environ @@ -148,7 +138,7 @@ def _before_request(): def _teardown_request(exc): - if disable_trace(flask.request.url, _excluded_hosts, _excluded_paths): + if _excluded_urls.url_disabled(flask.request.url): return activation = flask.request.environ.get(_ENVIRON_ACTIVATION_KEY) diff --git a/ext/opentelemetry-ext-flask/tests/test_programmatic.py b/ext/opentelemetry-ext-flask/tests/test_programmatic.py index 30fa9c4eb2..3a15979cb4 100644 --- a/ext/opentelemetry-ext-flask/tests/test_programmatic.py +++ b/ext/opentelemetry-ext-flask/tests/test_programmatic.py @@ -12,12 +12,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +from unittest.mock import patch + from flask import Flask, request from opentelemetry import trace from opentelemetry.ext.flask import FlaskInstrumentor from opentelemetry.test.test_base import TestBase from opentelemetry.test.wsgitestutil import WsgiTestBase +from opentelemetry.util import ExcludeList # pylint: disable=import-error from .base_test import InstrumentationTest @@ -139,3 +142,24 @@ def test_internal_error(self): self.assertEqual(span_list[0].name, "_hello_endpoint") self.assertEqual(span_list[0].kind, trace.SpanKind.SERVER) self.assertEqual(span_list[0].attributes, expected_attrs) + + @patch( + "opentelemetry.ext.flask._excluded_urls", + ExcludeList(["http://localhost/excluded_arg/123", "excluded_noarg"]), + ) + def test_exclude_lists(self): + self.client.get("/excluded_arg/123") + span_list = self.memory_exporter.get_finished_spans() + self.assertEqual(len(span_list), 0) + + self.client.get("/excluded_arg/125") + span_list = self.memory_exporter.get_finished_spans() + self.assertEqual(len(span_list), 1) + + self.client.get("/excluded_noarg") + span_list = self.memory_exporter.get_finished_spans() + self.assertEqual(len(span_list), 1) + + self.client.get("/excluded_noarg2") + span_list = self.memory_exporter.get_finished_spans() + self.assertEqual(len(span_list), 1) diff --git a/ext/opentelemetry-ext-pyramid/CHANGELOG.md b/ext/opentelemetry-ext-pyramid/CHANGELOG.md index efb1a08bbc..586f8bb5ce 100644 --- a/ext/opentelemetry-ext-pyramid/CHANGELOG.md +++ b/ext/opentelemetry-ext-pyramid/CHANGELOG.md @@ -2,6 +2,8 @@ ## Unreleased +- Use one general exclude list instead of two ([#872](https://github.com/open-telemetry/opentelemetry-python/pull/872)) + ## 0.9b0 Released 2020-06-10 diff --git a/ext/opentelemetry-ext-pyramid/README.rst b/ext/opentelemetry-ext-pyramid/README.rst index c7890ad095..319d6bff4c 100644 --- a/ext/opentelemetry-ext-pyramid/README.rst +++ b/ext/opentelemetry-ext-pyramid/README.rst @@ -13,6 +13,17 @@ Installation pip install opentelemetry-ext-pyramid +Exclude lists +************* +To exclude certain URLs from being tracked, set the environment variable ``OPENTELEMETRY_PYTHON_PYRAMID_EXCLUDED_URLS`` with comma delimited regexes representing which URLs to exclude. + +For example, + +:: + + export OPENTELEMETRY_PYTHON_PYRAMID_EXCLUDED_URLS="client/.*/info,healthcheck" + +will exclude requests such as ``https://site/client/123/info`` and ``https://site/xyz/healthcheck``. References ---------- diff --git a/ext/opentelemetry-ext-pyramid/src/opentelemetry/ext/pyramid/callbacks.py b/ext/opentelemetry-ext-pyramid/src/opentelemetry/ext/pyramid/callbacks.py index e19e447bee..4d3b81fdab 100644 --- a/ext/opentelemetry-ext-pyramid/src/opentelemetry/ext/pyramid/callbacks.py +++ b/ext/opentelemetry-ext-pyramid/src/opentelemetry/ext/pyramid/callbacks.py @@ -8,7 +8,7 @@ import opentelemetry.ext.wsgi as otel_wsgi from opentelemetry import configuration, context, propagators, trace from opentelemetry.ext.pyramid.version import __version__ -from opentelemetry.util import disable_trace, time_ns +from opentelemetry.util import ExcludeList, time_ns TWEEN_NAME = "opentelemetry.ext.pyramid.trace_tween_factory" SETTING_TRACE_ENABLED = "opentelemetry-pyramid.trace_enabled" @@ -22,22 +22,14 @@ _logger = getLogger(__name__) -def get_excluded_hosts(): - hosts = configuration.Configuration().PYRAMID_EXCLUDED_HOSTS or [] - if hosts: - hosts = str.split(hosts, ",") - return hosts +def get_excluded_urls(): + urls = configuration.Configuration().PYRAMID_EXCLUDED_URLS or [] + if urls: + urls = str.split(urls, ",") + return ExcludeList(urls) -def get_excluded_paths(): - paths = configuration.Configuration().PYRAMID_EXCLUDED_PATHS or [] - if paths: - paths = str.split(paths, ",") - return paths - - -_excluded_hosts = get_excluded_hosts() -_excluded_paths = get_excluded_paths() +_excluded_urls = get_excluded_urls() def includeme(config): @@ -119,7 +111,7 @@ def disabled_tween(request): # make a request tracing function def trace_tween(request): - if disable_trace(request.url, _excluded_hosts, _excluded_paths): + if _excluded_urls.url_disabled(request.url): request.environ[_ENVIRON_ENABLED_KEY] = False # short-circuit when we don't want to trace anything return handler(request) diff --git a/ext/opentelemetry-ext-pyramid/tests/test_programmatic.py b/ext/opentelemetry-ext-pyramid/tests/test_programmatic.py index e8937e8f5e..d086fc37eb 100644 --- a/ext/opentelemetry-ext-pyramid/tests/test_programmatic.py +++ b/ext/opentelemetry-ext-pyramid/tests/test_programmatic.py @@ -20,6 +20,7 @@ from opentelemetry.ext.pyramid import PyramidInstrumentor from opentelemetry.test.test_base import TestBase from opentelemetry.test.wsgitestutil import WsgiTestBase +from opentelemetry.util import ExcludeList # pylint: disable=import-error from .pyramid_base_test import InstrumentationTest @@ -169,12 +170,8 @@ def test_warnings(self, mock_logger): self.assertEqual(mock_logger.warning.called, True) @patch( - "opentelemetry.ext.pyramid.callbacks._excluded_hosts", - ["http://localhost/excluded_arg/123"], - ) - @patch( - "opentelemetry.ext.pyramid.callbacks._excluded_paths", - ["excluded_noarg"], + "opentelemetry.ext.pyramid.callbacks._excluded_urls", + ExcludeList(["http://localhost/excluded_arg/123", "excluded_noarg"]), ) def test_exclude_lists(self): self.client.get("/excluded_arg/123") diff --git a/opentelemetry-api/src/opentelemetry/util/__init__.py b/opentelemetry-api/src/opentelemetry/util/__init__.py index ed1268fcb6..1e4097cb9e 100644 --- a/opentelemetry-api/src/opentelemetry/util/__init__.py +++ b/opentelemetry-api/src/opentelemetry/util/__init__.py @@ -67,28 +67,13 @@ def _load_trace_provider(provider: str) -> "TracerProvider": return cast("TracerProvider", _load_provider(provider)) -# Pattern for matching up until the first '/' after the 'https://' part. -_URL_PATTERN = r"(https?|ftp)://.*?/" +class ExcludeList: + """Class to exclude certain paths (given as a list of regexes) from tracing requests""" + def __init__(self, excluded_urls: Sequence[str]): + self._non_empty = len(excluded_urls) > 0 + if self._non_empty: + self._regex = re.compile("|".join(excluded_urls)) -def disable_tracing_path(url: str, excluded_paths: Sequence[str]) -> bool: - if excluded_paths: - # Match only the part after the first '/' that is not in _URL_PATTERN - regex = "{}({})".format(_URL_PATTERN, "|".join(excluded_paths)) - if re.match(regex, url): - return True - return False - - -def disable_tracing_hostname( - url: str, excluded_hostnames: Sequence[str] -) -> bool: - return url in excluded_hostnames - - -def disable_trace( - url: str, excluded_hosts: Sequence[str], excluded_paths: Sequence[str] -) -> bool: - return disable_tracing_hostname( - url, excluded_hosts - ) or disable_tracing_path(url, excluded_paths) + def url_disabled(self, url: str) -> bool: + return bool(self._non_empty and re.search(self._regex, url)) From cfbdb2ba641a9aeb7e378d001d220a8153d56f2d Mon Sep 17 00:00:00 2001 From: Connor Adams Date: Fri, 3 Jul 2020 16:36:57 -0400 Subject: [PATCH 2/2] regex tests --- .../tests/util/test_exclude_list.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 opentelemetry-api/tests/util/test_exclude_list.py diff --git a/opentelemetry-api/tests/util/test_exclude_list.py b/opentelemetry-api/tests/util/test_exclude_list.py new file mode 100644 index 0000000000..da51478de3 --- /dev/null +++ b/opentelemetry-api/tests/util/test_exclude_list.py @@ -0,0 +1,61 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest import mock + +from opentelemetry.util import ExcludeList + + +class TestExcludeList(unittest.TestCase): + def test_basic(self): + regexes = ExcludeList(["path/123", "http://site.com/other_path"]) + self.assertTrue(regexes.url_disabled("http://site.com/path/123")) + self.assertTrue(regexes.url_disabled("http://site.com/path/123/abc")) + self.assertTrue( + regexes.url_disabled("https://site.com/path/123?arg=other") + ) + + self.assertFalse(regexes.url_disabled("https://site.com/path/abc/123")) + self.assertFalse(regexes.url_disabled("https://site.com/path")) + + self.assertTrue(regexes.url_disabled("http://site.com/other_path")) + self.assertTrue(regexes.url_disabled("http://site.com/other_path?abc")) + + self.assertFalse(regexes.url_disabled("https://site.com/other_path")) + self.assertFalse( + regexes.url_disabled("https://site.com/abc/other_path") + ) + + def test_regex(self): + regexes = ExcludeList( + [r"^https?://site\.com/path/123$", r"^http://.*\?arg=foo"] + ) + + self.assertTrue(regexes.url_disabled("http://site.com/path/123")) + self.assertTrue(regexes.url_disabled("https://site.com/path/123")) + + self.assertFalse(regexes.url_disabled("http://site.com/path/123/abc")) + self.assertFalse(regexes.url_disabled("http://site,com/path/123")) + + self.assertTrue( + regexes.url_disabled("http://site.com/path/123?arg=foo") + ) + self.assertTrue( + regexes.url_disabled("http://site.com/path/123?arg=foo,arg2=foo2") + ) + + self.assertFalse( + regexes.url_disabled("https://site.com/path/123?arg=foo") + )