From b0b249bfa73e9602cdc35ec4761af3772dbd9096 Mon Sep 17 00:00:00 2001 From: Ekaterina Tyurina Date: Mon, 4 Feb 2019 18:06:21 +0000 Subject: [PATCH] Add flag reporting-zipkin-sample-rate --- src/python/pants/reporting/reporting.py | 5 ++++- src/python/pants/reporting/zipkin_reporter.py | 13 ++++++++----- .../reporting/test_reporting_integration.py | 17 +++++++++++++++++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/src/python/pants/reporting/reporting.py b/src/python/pants/reporting/reporting.py index 25d7c82c16e..76f07e84e87 100644 --- a/src/python/pants/reporting/reporting.py +++ b/src/python/pants/reporting/reporting.py @@ -60,6 +60,8 @@ def register_options(cls, register): help='The 64-bit ID for a parent span that invokes Pants. ' 'zipkin-trace-id and zipkin-parent-id must both either be set or not set ' 'when run Pants command') + register('--zipkin-sample-rate', advanced=True, default=100.0, + help='Rate at which to sample Zipkin traces. Value 0.0 - 100.0.') def initialize(self, run_tracker, all_options, start_time=None): """Initialize with the given RunTracker. @@ -100,6 +102,7 @@ def initialize(self, run_tracker, all_options, start_time=None): zipkin_endpoint = self.get_options().zipkin_endpoint trace_id = self.get_options().zipkin_trace_id parent_id = self.get_options().zipkin_parent_id + sample_rate = self.get_options().zipkin_sample_rate if zipkin_endpoint is None and trace_id is not None and parent_id is not None: raise ValueError( @@ -113,7 +116,7 @@ def initialize(self, run_tracker, all_options, start_time=None): if zipkin_endpoint is not None: zipkin_reporter_settings = ZipkinReporter.Settings(log_level=Report.INFO) zipkin_reporter = ZipkinReporter( - run_tracker, zipkin_reporter_settings, zipkin_endpoint, trace_id, parent_id + run_tracker, zipkin_reporter_settings, zipkin_endpoint, trace_id, parent_id, sample_rate ) report.add_reporter('zipkin', zipkin_reporter) diff --git a/src/python/pants/reporting/zipkin_reporter.py b/src/python/pants/reporting/zipkin_reporter.py index 48deac3c892..68dd252df43 100644 --- a/src/python/pants/reporting/zipkin_reporter.py +++ b/src/python/pants/reporting/zipkin_reporter.py @@ -10,7 +10,7 @@ from py_zipkin import Encoding from py_zipkin.transport import BaseTransportHandler from py_zipkin.util import generate_random_64bit_string -from py_zipkin.zipkin import ZipkinAttrs, zipkin_span +from py_zipkin.zipkin import ZipkinAttrs, create_attrs_for_span, zipkin_span from pants.base.workunit import WorkUnitLabel from pants.reporting.reporter import Reporter @@ -42,7 +42,7 @@ class ZipkinReporter(Reporter): Reporter that implements Zipkin tracing. """ - def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id): + def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id, sample_rate): """ When trace_id and parent_id are set a Zipkin trace will be created with given trace_id and parent_id. If trace_id and parent_id are set to None, a trace_id will be randomly @@ -53,6 +53,7 @@ def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id): :param string endpoint: The full HTTP URL of a zipkin server to which traces should be posted. :param string trace_id: The overall 64 or 128-bit ID of the trace. May be None. :param string parent_id: The 64-bit ID for a parent span that invokes Pants. May be None. + :param float sample_rate: Rate at which to sample Zipkin traces. Value 0.0 - 100.0. """ super(ZipkinReporter, self).__init__(run_tracker, settings) # We keep track of connection between workunits and spans @@ -61,6 +62,7 @@ def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id): self.handler = HTTPTransportHandler(endpoint) self.trace_id = trace_id self.parent_id = parent_id + self.sample_rate = float(sample_rate) def start_workunit(self, workunit): """Implementation of Reporter callback.""" @@ -84,13 +86,14 @@ def start_workunit(self, workunit): is_sampled=True, ) else: - zipkin_attrs = None + zipkin_attrs = create_attrs_for_span( + sample_rate=self.sample_rate, # Value between 0.0 and 100.0 + ) span = zipkin_span( service_name=service_name, span_name=workunit.name, transport_handler=self.handler, - sample_rate=100.0, # Value between 0.0 and 100.0 encoding=Encoding.V1_THRIFT, zipkin_attrs=zipkin_attrs ) @@ -104,7 +107,7 @@ def start_workunit(self, workunit): # Goals and tasks save their start time at the beginning of their run. # This start time is passed to workunit, because the workunit may be created much later. span.start_timestamp = workunit.start_time - if first_span: + if first_span and span.zipkin_attrs.is_sampled: span.logging_context.start_timestamp = workunit.start_time def end_workunit(self, workunit): diff --git a/tests/python/pants_test/reporting/test_reporting_integration.py b/tests/python/pants_test/reporting/test_reporting_integration.py index 9f986f7924a..c09303aedf5 100644 --- a/tests/python/pants_test/reporting/test_reporting_integration.py +++ b/tests/python/pants_test/reporting/test_reporting_integration.py @@ -220,6 +220,23 @@ def test_zipkin_reporter_with_given_trace_id_parent_id(self): self.assertTrue(main_children) self.assertTrue(any(span['name'] == 'cloc' for span in main_children)) + def test_zipkin_reporter_with_zero_sample_rate(self): + ZipkinHandler = zipkin_handler() + with http_server(ZipkinHandler) as port: + endpoint = "http://localhost:{}".format(port) + command = [ + '--reporting-zipkin-endpoint={}'.format(endpoint), + '--reporting-zipkin-sample-rate=0.0', + 'cloc', + 'src/python/pants:version' + ] + + pants_run = self.run_pants(command) + self.assert_success(pants_run) + + num_of_traces = len(ZipkinHandler.traces) + self.assertEqual(num_of_traces, 0) + @staticmethod def find_spans_by_name(trace, name): return [span for span in trace if span['name'] == name]