diff --git a/.github/workflows/validation.yml b/.github/workflows/validation.yml new file mode 100644 index 00000000..c2ba1aaf --- /dev/null +++ b/.github/workflows/validation.yml @@ -0,0 +1,46 @@ +name: Data validation + +on: + pull_request: + +jobs: + validation: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + + - name: Install build dependencies + run: | + sudo apt-get -y install libunwind-dev binutils-dev libiberty-dev + + - name: Install Python 3.10 + uses: actions/setup-python@v4 + with: + python-version: "3.10" + + - name: Compile Austin + run: | + autoreconf --install + ./configure --enable-debug-symbols true + make + + - name: Install runtime dependencies + run: | + python3.10 -m venv .venv + source .venv/bin/activate + pip install --upgrade pip + pip install -r scripts/requirements-val.txt + deactivate + + - name: Run data validation + run: | + ulimit -c unlimited + + source .venv/bin/activate + python scripts/validation.py --format markdown | tee comment.txt + deactivate + + - name: Post results on PR + uses: marocchino/sticky-pull-request-comment@v2 + with: + path: comment.txt diff --git a/scripts/benchmark.py b/scripts/benchmark.py index 5f7095a6..2f71e3ae 100644 --- a/scripts/benchmark.py +++ b/scripts/benchmark.py @@ -7,19 +7,15 @@ from textwrap import wrap import typing as t from argparse import ArgumentParser -from itertools import product from math import floor, log from pathlib import Path from scipy.stats import ttest_ind -sys.path.insert(0, str(Path(__file__).parent.parent)) +from common import download_release + +from test.utils import metadata, target -import tarfile -from io import BytesIO -from test.utils import Variant, metadata, target -from urllib.error import HTTPError -from urllib.request import urlopen VERSIONS = ("3.4.1", "3.5.0", "dev") SCENARIOS = [ @@ -99,38 +95,6 @@ def get_stats(output: str) -> t.Optional[dict]: return None -def download_release(version: str, dest: Path, variant_name: str = "austin") -> Variant: - if version == "dev": - return Variant(f"src/{variant_name}") - - binary_dest = dest / version - binary = binary_dest / variant_name - - if not binary.exists(): - prefix = "https://github.com/p403n1x87/austin/releases/download/" - for flavour, v in product({"-gnu", ""}, {"", "v"}): - try: - with urlopen( - f"{prefix}v{version}/{variant_name}-{v}{version}{flavour}-linux-amd64.tar.xz" - ) as stream: - buffer = BytesIO(stream.read()) - binary_dest.mkdir(parents=True, exist_ok=True) - tar = tarfile.open(fileobj=buffer, mode="r:xz") - tar.extract(variant_name, str(binary_dest)) - except HTTPError: - continue - break - else: - raise RuntimeError(f"Could not download Austin version {version}") - - variant = Variant(str(binary)) - - out = variant("-V").stdout - assert f"{variant_name} {version}" in out, (f"{variant_name} {version}", out) - - return variant - - class Outcome: def __init__(self, data: list[float]) -> None: self.data = data diff --git a/scripts/common.py b/scripts/common.py new file mode 100644 index 00000000..e2ecff34 --- /dev/null +++ b/scripts/common.py @@ -0,0 +1,59 @@ +import sys +from itertools import product +from pathlib import Path + +sys.path.insert(0, str(Path(__file__).parent.parent)) + +import tarfile +from io import BytesIO +from test.utils import Variant +from urllib.error import HTTPError +from urllib.request import urlopen +import json + + +def get_latest_release() -> str: + with urlopen( + "https://api.github.com/repos/p403n1x87/austin/releases/latest" + ) as stream: + return json.loads(stream.read().decode("utf-8"))["tag_name"].strip("v") + + +def download_release(version: str, dest: Path, variant_name: str = "austin") -> Variant: + if version == "dev": + return Variant(f"src/{variant_name}") + + binary_dest = dest / version + binary = binary_dest / variant_name + + if not binary.exists(): + prefix = "https://github.com/p403n1x87/austin/releases/download/" + for flavour, v in product({"-gnu", ""}, {"", "v"}): + try: + with urlopen( + f"{prefix}v{version}/{variant_name}-{v}{version}{flavour}-linux-amd64.tar.xz" + ) as stream: + buffer = BytesIO(stream.read()) + binary_dest.mkdir(parents=True, exist_ok=True) + tar = tarfile.open(fileobj=buffer, mode="r:xz") + tar.extract(variant_name, str(binary_dest)) + except HTTPError: + continue + break + else: + raise RuntimeError(f"Could not download Austin version {version}") + + variant = Variant(str(binary)) + + out = variant("-V").stdout + assert f"{variant_name} {version}" in out, (f"{variant_name} {version}", out) + + return variant + + +def download_latest(dest: Path, variant_name: str = "austin") -> Variant: + return download_release(get_latest_release(), dest, variant_name) + + +def get_dev(variant_name: str = "austin") -> Variant: + return download_release("dev", None, variant_name) diff --git a/scripts/requirements-val.txt b/scripts/requirements-val.txt new file mode 100644 index 00000000..c6dd2b1e --- /dev/null +++ b/scripts/requirements-val.txt @@ -0,0 +1,3 @@ +austin-python~=1.5 +numpy +scipy diff --git a/scripts/validation.py b/scripts/validation.py new file mode 100644 index 00000000..f87927a9 --- /dev/null +++ b/scripts/validation.py @@ -0,0 +1,259 @@ +# Run as python3 scripts/validation.py from the repository root directory. +# Ensure dependencies from requirements-val.txt are installed. + +from argparse import ArgumentParser +from collections import Counter, namedtuple +from io import BytesIO +from itertools import chain +from pathlib import Path +import re +import sys +import typing as t + +import common + +import numpy as np +from scipy.stats import f + +from austin.format.mojo import ( + MojoFile, + MojoStack, + MojoFrameReference, + MojoMetric, + MojoFrame, +) +from test.utils import target + +Scenario = namedtuple("Scenario", ["title", "variant", "args"]) + +SCENARIOS = [ + Scenario( + "Wall time", + "austin", + ( + "-i", + "1ms", + sys.executable, + target("target34.py"), + ), + ), + Scenario( + "CPU time", + "austin", + ( + "-si", + "1ms", + sys.executable, + target("target34.py"), + ), + ), +] + + +class AustinFlameGraph(dict): + """Element of a free module over a ring.""" + + def __call__(self, x): + return self.get(x, 0) + + def __add__(self, other): + m = self.__class__(self) + for k, v in other.items(): + n = m.setdefault(k, v.__class__()) + v + if not n and k in m: + del m[k] + continue + m[k] = n + return m + + def __mul__(self, other): + m = self.__class__(self) + for k, v in self.items(): + n = v * other + if not n and k in m: + del m[k] + continue + m[k] = n + return m + + def __rmul__(self, other): + return self.__mul__(other) + + def __truediv__(self, other): + return self * (1 / other) + + def __rtruediv__(self, other): + return self.__div__(other) + + def __sub__(self, other): + return self + (-other) + + def __neg__(self): + m = self.__class__(self) + for k, v in m.items(): + m[k] = -v + return m + + def supp(self): + return set(self.keys()) + + def to_list(self, domain: list) -> list: + return [self(v) for v in domain] + + @classmethod + def from_list(cls, stacks: t.List[t.Tuple[str, int]]) -> "AustinFlameGraph": + return sum((cls({stack: metric}) for stack, metric in stacks), cls()) + + @classmethod + def from_mojo(cls, data: bytes) -> "AustinFlameGraph": + fg = cls() + + stack: t.List[str] = [] + metric = 0 + + def serialize(frame: MojoFrame) -> str: + return ":".join( + ( + frame.filename.string.value, + frame.scope.string.value, + str(frame.line), + str(frame.line_end), + str(frame.column), + str(frame.column_end), + ) + ) + + for e in MojoFile(BytesIO(data)).parse(): + if isinstance(e, MojoStack): + if stack: + fg += cls({";".join(stack): metric}) + stack.clear() + metric = 0 + elif isinstance(e, MojoFrameReference): + stack.append(serialize(e.frame)) + elif isinstance(e, MojoMetric): + metric = e.value + + return fg + + +def hotelling_two_sample_test(X, Y) -> float: + nx, p = X.shape + ny, q = Y.shape + + assert p == q, "X and Y must have the same dimensionality" + + dof = nx + ny - p - 1 + + assert ( + dof > 0 + ), f"X ({nx}x{p}) and Y ({ny}x{q}) must have at least p ({p}) + 1 samples" + + g = dof / p / (nx + ny - 2) * (nx * ny) / (nx + ny) + + x_mean = np.mean(X, axis=0) + y_mean = np.mean(Y, axis=0) + delta = x_mean - y_mean + + x_cov = np.cov(X, rowvar=False) + y_cov = np.cov(Y, rowvar=False) + pooled_cov = ((nx - 1) * x_cov + (ny - 1) * y_cov) / (nx + ny - 2) + + # Compute the F statistic from the Hotelling T^2 statistic + statistic = g * delta.transpose() @ np.linalg.inv(pooled_cov) @ delta + f_pdf = f(p, dof) + + return 1 - f_pdf.cdf(statistic) + + +def compare( + x: t.List[AustinFlameGraph], + y: t.List[AustinFlameGraph], + threshold: t.Optional[float] = None, +) -> float: + domain = list(set().union(*(_.supp() for _ in chain(x, y)))) + + if threshold is not None: + c = Counter() + for _ in chain(x, y): + c.update(_.supp()) + domain = sorted([k for k, v in c.items() if v >= threshold]) + + X = np.array([f.to_list(domain) for f in x], dtype=np.int32) + Y = np.array([f.to_list(domain) for f in y], dtype=np.int32) + + return hotelling_two_sample_test(X, Y) + + +def validate(args, variant: str = "austin", runs: int = 10, p: float = 0.05) -> bool: + austin_latest = common.download_latest(dest=Path("/tmp"), variant_name=variant) + austin_dev = common.get_dev(variant_name=variant) + + return ( + compare( + *( + [ + AustinFlameGraph.from_mojo( + austin( + *args, + mojo=True, + convert=False, + ).stdout + ) + for _ in range(runs) + ] + for austin in (austin_latest, austin_dev) + ), + threshold=runs, # Keep only the stacks that are present in all runs + ) + > p + ) + + +if __name__ == "__main__": + argp = ArgumentParser() + + argp.add_argument( + "-k", + type=re.compile, + help="Run benchmark scenarios that match the given regular expression", + ) + + argp.add_argument( + "-n", + type=int, + default=30, + help="Number of times to run each scenario", + ) + + argp.add_argument( + "-f", + "--format", + type=str, + choices=["terminal", "markdown"], + default="terminal", + help="The output format", + ) + + opts = argp.parse_args() + + print("# Austin Data Validation\n") + + failures: t.List[Scenario] = [] + for scenario in SCENARIOS: + print( + f"Validating {scenario.title} ... ", + end="\r", + flush=True, + file=sys.stderr, + ) + if not validate(scenario.args, scenario.variant, runs=opts.n): + failures.append(scenario) + + if failures: + print("💥 The following scenarios failed to validate:\n") + + for scenario in failures: + print(f"- {scenario.title}") + else: + print(f"✨ 🍰 ✨ All {len(SCENARIOS)} scenarios validated successfully!") diff --git a/test/utils.py b/test/utils.py index 7ef9b697..51a0ea8c 100644 --- a/test/utils.py +++ b/test/utils.py @@ -149,7 +149,7 @@ def __init__(self, name: str) -> None: self.ALL.append(self) def __call__( - self, *args: str, timeout: int = 60, mojo: bool = False + self, *args: str, timeout: int = 60, mojo: bool = False, convert: bool = True ) -> CompletedProcess: if not self.path.is_file(): pytest.skip(f"Variant '{self}' not available") @@ -168,7 +168,8 @@ def __call__( if mojo and not ({"-o", "-w", "--output", "--where"} & set(args)): # We produce MOJO binary data only if we are not writing to file # or using the "where" option. - result.stdout = demojo(result.stdout) + if convert: + result.stdout = convert(result.stdout) else: result.stdout = result.stdout.decode(errors="ignore") result.stderr = result.stderr.decode()