From c1c0bd7a251865e82b1e8d0292451f78f19f22a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= <berykubik@gmail.com>
Date: Thu, 26 Sep 2024 13:44:06 +0200
Subject: [PATCH] Upload average CPU consumption of CI jobs to DataDog

---
 .github/workflows/ci.yml               | 10 ++++
 src/ci/scripts/upload-artifacts.sh     |  2 +-
 src/ci/scripts/upload-build-metrics.py | 81 ++++++++++++++++++++++++++
 3 files changed, 92 insertions(+), 1 deletion(-)
 create mode 100644 src/ci/scripts/upload-build-metrics.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8032154a7365b..b6dc27f123465 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -212,6 +212,16 @@ jobs:
         # erroring about invalid credentials instead.
         if: github.event_name == 'push' || env.DEPLOY == '1' || env.DEPLOY_ALT == '1'
 
+      - name: upload job metrics to DataDog
+        if: needs.calculate_matrix.outputs.run_type != 'pr'
+        env:
+          DATADOG_SITE: datadoghq.com
+          DATADOG_API_KEY: ${{ secrets.DATADOG_API_KEY }}
+          DD_GITHUB_JOB_NAME: ${{ matrix.name }}
+        run: |
+          npm install -g @datadog/datadog-ci@^2.x.x
+          python3 src/ci/scripts/upload-build-metrics.py build/cpu-usage.csv
+
   # This job isused to tell bors the final status of the build, as there is no practical way to detect
   # when a workflow is successful listening to webhooks only in our current bors implementation (homu).
   outcome:
diff --git a/src/ci/scripts/upload-artifacts.sh b/src/ci/scripts/upload-artifacts.sh
index 61c187fa77c01..129ede636f397 100755
--- a/src/ci/scripts/upload-artifacts.sh
+++ b/src/ci/scripts/upload-artifacts.sh
@@ -23,7 +23,7 @@ if [[ "${DEPLOY-0}" -eq "1" ]] || [[ "${DEPLOY_ALT-0}" -eq "1" ]]; then
 fi
 
 # CPU usage statistics.
-mv build/cpu-usage.csv "${upload_dir}/cpu-${CI_JOB_NAME}.csv"
+cp build/cpu-usage.csv "${upload_dir}/cpu-${CI_JOB_NAME}.csv"
 
 # Build metrics generated by x.py.
 mv "${build_dir}/metrics.json" "${upload_dir}/metrics-${CI_JOB_NAME}.json"
diff --git a/src/ci/scripts/upload-build-metrics.py b/src/ci/scripts/upload-build-metrics.py
new file mode 100644
index 0000000000000..a95e0949d700a
--- /dev/null
+++ b/src/ci/scripts/upload-build-metrics.py
@@ -0,0 +1,81 @@
+"""
+This script postprocesses data gathered during a CI run, computes certain metrics
+from them, and uploads these metrics to DataDog.
+
+This script is expected to be executed from within a GitHub Actions job.
+
+It expects the following environment variables:
+- DATADOG_SITE: path to the DataDog API endpoint
+- DATADOG_API_KEY: DataDog API token
+- DD_GITHUB_JOB_NAME: Name of the current GitHub Actions job
+
+And it also expects the presence of a binary called `datadog-ci` to be in PATH.
+It can be installed with `npm install -g @datadog/datadog-ci`.
+
+Usage:
+```bash
+$ python3 upload-build-metrics.py <path-to-CPU-usage-CSV>
+```
+
+`path-to-CPU-usage-CSV` is a path to a CSV generated by the `src/ci/cpu-usage-over-time.py` script.
+"""
+import argparse
+import csv
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import List
+
+
+def load_cpu_usage(path: Path) -> List[float]:
+    usage = []
+    with open(path) as f:
+        reader = csv.reader(f, delimiter=',')
+        for row in reader:
+            # The log might contain incomplete rows or some Python exception
+            if len(row) == 2:
+                try:
+                    idle = float(row[1])
+                    usage.append(100.0 - idle)
+                except ValueError:
+                    pass
+    return usage
+
+
+def upload_datadog_measure(name: str, value: float):
+    """
+    Uploads a single numeric metric for the current GitHub Actions job to DataDog.
+    """
+    print(f"Metric {name}: {value:.4f}")
+
+    datadog_cmd = "datadog-ci"
+    if os.getenv("GITHUB_ACTIONS") is not None and sys.platform.lower().startswith("win"):
+        # Due to weird interaction of MSYS2 and Python, we need to use an absolute path,
+        # and also specify the ".cmd" at the end. See https://github.com/rust-lang/rust/pull/125771.
+        datadog_cmd = "C:\\npm\\prefix\\datadog-ci.cmd"
+
+    subprocess.run([
+        datadog_cmd,
+        "measure",
+        "--level", "job",
+        "--measures", f"{name}:{value}"
+    ],
+        check=False
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        prog="DataDog metric uploader"
+    )
+    parser.add_argument("cpu-usage-history-csv")
+    args = parser.parse_args()
+
+    build_usage_csv = vars(args)["cpu-usage-history-csv"]
+    usage_timeseries = load_cpu_usage(Path(build_usage_csv))
+    if len(usage_timeseries) > 0:
+        avg_cpu_usage = sum(usage_timeseries) / len(usage_timeseries)
+    else:
+        avg_cpu_usage = 0
+    upload_datadog_measure("avg-cpu-usage", avg_cpu_usage)