Add optional scatterplot to benchcomp output (#3077)

Scatterplots should make it easier to immediately spot performance trends (and indeed any differences) rather than having to process a (large) number of table rows. Uses mermaid-js to produce markdown-embedded plots that will display on the job summary page. Scatterplots are not directly supported by mermaid-js at this point (xycharts only do line or bar charts), so quadrant plots are employed with various diagram items drawn in white to make them disappear.
model-checking · Mar 20, 2024 · ec2b8b3 · ec2b8b3
1 parent 54786ad
commit ec2b8b3
Show file tree

Hide file tree

Showing 4 changed files with 138 additions and 7 deletions.
diff --git a/tests/perf/s2n-quic b/tests/perf/s2n-quic
diff --git a/tools/benchcomp/benchcomp/visualizers/__init__.py b/tools/benchcomp/benchcomp/visualizers/__init__.py
@@ -3,8 +3,10 @@
 
 
 import dataclasses
+import enum
 import json
 import logging
+import math
 import subprocess
 import sys
 import textwrap
@@ -125,11 +127,21 @@ def __call__(self, results):
 
 
 
+class Plot(enum.Enum):
+    """Scatterplot configuration options
+    """
+    OFF = 1
+    LINEAR = 2
+    LOG = 3
+
+
+
 class dump_markdown_results_table:
     """Print Markdown-formatted tables displaying benchmark results
 
     For each metric, this visualization prints out a table of benchmarks,
-    showing the value of the metric for each variant.
+    showing the value of the metric for each variant, combined with an optional
+    scatterplot.
 
     The 'out_file' key is mandatory; specify '-' to print to stdout.
 
@@ -145,12 +157,16 @@ class dump_markdown_results_table:
     particular combinations of values for different variants, such as
     regressions or performance improvements.
 
+    'scatterplot' takes the values 'off' (default), 'linear' (linearly scaled
+    axes), or 'log' (logarithmically scaled axes).
+
     Sample configuration:
 
     ```
     visualize:
     - type: dump_markdown_results_table
       out_file: "-"
+      scatterplot: linear
       extra_columns:
         runtime:
         - column_name: ratio
@@ -187,9 +203,10 @@ class dump_markdown_results_table:
     """
 
 
-    def __init__(self, out_file, extra_columns=None):
+    def __init__(self, out_file, extra_columns=None, scatterplot=None):
         self.get_out_file = benchcomp.Outfile(out_file)
         self.extra_columns = self._eval_column_text(extra_columns or {})
+        self.scatterplot = self._parse_scatterplot_config(scatterplot)
 
 
     @staticmethod
@@ -206,12 +223,48 @@ def _eval_column_text(column_spec):
         return column_spec
 
 
+    @staticmethod
+    def _parse_scatterplot_config(scatterplot_config_string):
+        if (scatterplot_config_string is None or
+                scatterplot_config_string == "off"):
+            return Plot.OFF
+        elif scatterplot_config_string == "linear":
+            return Plot.LINEAR
+        elif scatterplot_config_string == "log":
+            return Plot.LOG
+        else:
+            logging.error(
+                "Invalid scatterplot configuration '%s'",
+                scatterplot_config_string)
+            sys.exit(1)
+
+
     @staticmethod
     def _get_template():
         return textwrap.dedent("""\
             {% for metric, benchmarks in d["metrics"].items() %}
             ## {{ metric }}
 
+            {% if scatterplot and metric in d["scaled_metrics"] and d["scaled_variants"][metric]|length == 2 -%}
+            ```mermaid
+            %%{init: { "quadrantChart": { "titlePadding": 15, "xAxisLabelPadding": 20, "yAxisLabelPadding": 20, "quadrantLabelFontSize": 0, "pointRadius": 2, "pointLabelFontSize": 2 }, "themeVariables": { "quadrant1Fill": "#FFFFFF", "quadrant2Fill": "#FFFFFF", "quadrant3Fill": "#FFFFFF", "quadrant4Fill": "#FFFFFF", "quadrant1TextFill": "#FFFFFF", "quadrant2TextFill": "#FFFFFF", "quadrant3TextFill": "#FFFFFF", "quadrant4TextFill": "#FFFFFF", "quadrantInternalBorderStrokeFill": "#FFFFFF" } } }%%
+            quadrantChart
+                title {{ metric }}
+                x-axis {{ d["scaled_variants"][metric][0] }}
+                y-axis {{ d["scaled_variants"][metric][1] }}
+                quadrant-1 1
+                quadrant-2 2
+                quadrant-3 3
+                quadrant-4 4
+                {%- for bench_name, bench_variants in d["scaled_metrics"][metric]["benchmarks"].items () %}
+                {% set v0 = bench_variants[d["scaled_variants"][metric][0]] -%}
+                {% set v1 = bench_variants[d["scaled_variants"][metric][1]] -%}
+                "{{ bench_name }}": [{{ v0|round(3) }}, {{ v1|round(3) }}]
+                {%- endfor %}
+            ```
+            Scatterplot axis ranges are {{ d["scaled_metrics"][metric]["min_value"] }} (bottom/left) to {{ d["scaled_metrics"][metric]["max_value"] }} (top/right).
+
+            {% endif -%}
             | Benchmark | {% for variant in d["variants"][metric] %} {{ variant }} |{% endfor %}
             | --- |{% for variant in d["variants"][metric] %} --- |{% endfor -%}
             {% for bench_name, bench_variants in benchmarks.items () %}
@@ -228,7 +281,48 @@ def _get_variant_names(results):
 
 
     @staticmethod
-    def _organize_results_into_metrics(results):
+    def _compute_scaled_metric(data_for_metric, log_scaling):
+        min_value = math.inf
+        max_value = -math.inf
+        for bench, bench_result in data_for_metric.items():
+            for variant, variant_result in bench_result.items():
+                if isinstance(variant_result, (bool, str)):
+                    return None
+                if not isinstance(variant_result, (int, float)):
+                    return None
+                if variant_result < min_value:
+                    min_value = variant_result
+                if variant_result > max_value:
+                    max_value = variant_result
+        ret = {
+                "benchmarks": {bench: {} for bench in data_for_metric.keys()},
+                "min_value": "log({})".format(min_value) if log_scaling else min_value,
+                "max_value": "log({})".format(max_value) if log_scaling else max_value,
+              }
+        # 1.0 is not a permissible value for mermaid, so make sure all scaled
+        # results stay below that by use 0.99 as hard-coded value or
+        # artificially increasing the range by 10 per cent
+        if min_value == math.inf or min_value == max_value:
+            for bench, bench_result in data_for_metric.items():
+                ret["benchmarks"][bench] = {variant: 0.99 for variant in bench_result.keys()}
+        else:
+            if log_scaling:
+                min_value = math.log(min_value, 10)
+                max_value = math.log(max_value, 10)
+            value_range = max_value - min_value
+            value_range = value_range * 1.1
+            for bench, bench_result in data_for_metric.items():
+                for variant, variant_result in bench_result.items():
+                    if log_scaling:
+                        abs_value = math.log(variant_result, 10)
+                    else:
+                        abs_value = variant_result
+                    ret["benchmarks"][bench][variant] = (abs_value - min_value) / value_range
+        return ret
+
+
+    @staticmethod
+    def _organize_results_into_metrics(results, log_scaling):
         ret = {metric: {} for metric in results["metrics"]}
         for bench, bench_result in results["benchmarks"].items():
             for variant, variant_result in bench_result["variants"].items():
@@ -246,7 +340,13 @@ def _organize_results_into_metrics(results):
                         ret[metric][bench] = {
                             variant: variant_result["metrics"][metric]
                         }
-        return ret
+        ret_scaled = {}
+        for metric, bench_result in ret.items():
+            scaled = dump_markdown_results_table._compute_scaled_metric(
+                    bench_result, log_scaling)
+            if scaled is not None:
+                ret_scaled[metric] = scaled
+        return (ret, ret_scaled)
 
 
     def _add_extra_columns(self, metrics):
@@ -271,20 +371,34 @@ def _get_variants(metrics):
         return ret
 
 
+    @staticmethod
+    def _get_scaled_variants(metrics):
+        ret = {}
+        for metric, entries in metrics.items():
+            for bench, variants in entries["benchmarks"].items():
+                ret[metric] = list(variants.keys())
+                break
+        return ret
+
+
     def __call__(self, results):
-        metrics = self._organize_results_into_metrics(results)
+        (metrics, scaled) = self._organize_results_into_metrics(
+                results, self.scatterplot == Plot.LOG)
         self._add_extra_columns(metrics)
 
         data = {
             "metrics": metrics,
             "variants": self._get_variants(metrics),
+            "scaled_metrics": scaled,
+            "scaled_variants": self._get_scaled_variants(scaled),
         }
 
         env = jinja2.Environment(
             loader=jinja2.BaseLoader, autoescape=jinja2.select_autoescape(
                 enabled_extensions=("html"),
                 default_for_string=True))
         template = env.from_string(self._get_template())
-        output = template.render(d=data)[:-1]
+        include_scatterplot = self.scatterplot != Plot.OFF
+        output = template.render(d=data, scatterplot=include_scatterplot)[:-1]
         with self.get_out_file() as handle:
             print(output, file=handle)
diff --git a/tools/benchcomp/configs/perf-regression.yaml b/tools/benchcomp/configs/perf-regression.yaml
@@ -33,6 +33,7 @@ visualize:
 
   - type: dump_markdown_results_table
     out_file: '-'
+    scatterplot: linear
     extra_columns:
 
       # For these two metrics, display the difference between old and new and

diff --git a/tools/benchcomp/test/test_regression.py b/tools/benchcomp/test/test_regression.py
@@ -436,6 +436,7 @@ def test_markdown_results_table(self):
                 "visualize": [{
                     "type": "dump_markdown_results_table",
                     "out_file": "-",
+                    "scatterplot": "linear",
                     "extra_columns": {
                         "runtime": [{
                             "column_name": "ratio",
@@ -461,6 +462,21 @@ def test_markdown_results_table(self):
                 run_bc.stdout, textwrap.dedent("""
                     ## runtime
 
+                    ```mermaid
+                    %%{init: { "quadrantChart": { "titlePadding": 15, "xAxisLabelPadding": 20, "yAxisLabelPadding": 20, "quadrantLabelFontSize": 0, "pointRadius": 2, "pointLabelFontSize": 2 }, "themeVariables": { "quadrant1Fill": "#FFFFFF", "quadrant2Fill": "#FFFFFF", "quadrant3Fill": "#FFFFFF", "quadrant4Fill": "#FFFFFF", "quadrant1TextFill": "#FFFFFF", "quadrant2TextFill": "#FFFFFF", "quadrant3TextFill": "#FFFFFF", "quadrant4TextFill": "#FFFFFF", "quadrantInternalBorderStrokeFill": "#FFFFFF" } } }%%
+                    quadrantChart
+                        title runtime
+                        x-axis variant_1
+                        y-axis variant_2
+                        quadrant-1 1
+                        quadrant-2 2
+                        quadrant-3 3
+                        quadrant-4 4
+                        "bench_1": [0.0, 0.909]
+                        "bench_2": [0.909, 0.0]
+                    ```
+                    Scatterplot axis ranges are 5 (bottom/left) to 10 (top/right).
+
                     | Benchmark |  variant_1 | variant_2 | ratio |
                     | --- | --- | --- | --- |
                     | bench_1 | 5 | 10 | **2.0** |