diff --git a/.github/workflows/validation.yml b/.github/workflows/validation.yml
new file mode 100644
index 00000000..c2ba1aaf
--- /dev/null
+++ b/.github/workflows/validation.yml
@@ -0,0 +1,46 @@
+name: Data validation
+
+on:
+  pull_request:
+
+jobs:
+  validation:
+    runs-on: ubuntu-20.04
+    steps:
+      - uses: actions/checkout@v2
+
+      - name: Install build dependencies
+        run: |
+          sudo apt-get -y install libunwind-dev binutils-dev libiberty-dev
+
+      - name: Install Python 3.10
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+
+      - name: Compile Austin
+        run: |
+          autoreconf --install
+          ./configure --enable-debug-symbols true
+          make
+
+      - name: Install runtime dependencies
+        run: |
+          python3.10 -m venv .venv
+          source .venv/bin/activate
+          pip install --upgrade pip
+          pip install -r scripts/requirements-val.txt
+          deactivate
+
+      - name: Run data validation
+        run: |
+          ulimit -c unlimited
+
+          source .venv/bin/activate
+          python scripts/validation.py --format markdown | tee comment.txt
+          deactivate
+
+      - name: Post results on PR
+        uses: marocchino/sticky-pull-request-comment@v2
+        with:
+          path: comment.txt
diff --git a/scripts/benchmark.py b/scripts/benchmark.py
index 5f7095a6..2f71e3ae 100644
--- a/scripts/benchmark.py
+++ b/scripts/benchmark.py
@@ -7,19 +7,15 @@
 from textwrap import wrap
 import typing as t
 from argparse import ArgumentParser
-from itertools import product
 from math import floor, log
 from pathlib import Path
 
 from scipy.stats import ttest_ind
 
-sys.path.insert(0, str(Path(__file__).parent.parent))
+from common import download_release
+
+from test.utils import metadata, target
 
-import tarfile
-from io import BytesIO
-from test.utils import Variant, metadata, target
-from urllib.error import HTTPError
-from urllib.request import urlopen
 
 VERSIONS = ("3.4.1", "3.5.0", "dev")
 SCENARIOS = [
@@ -99,38 +95,6 @@ def get_stats(output: str) -> t.Optional[dict]:
         return None
 
 
-def download_release(version: str, dest: Path, variant_name: str = "austin") -> Variant:
-    if version == "dev":
-        return Variant(f"src/{variant_name}")
-
-    binary_dest = dest / version
-    binary = binary_dest / variant_name
-
-    if not binary.exists():
-        prefix = "https://github.com/p403n1x87/austin/releases/download/"
-        for flavour, v in product({"-gnu", ""}, {"", "v"}):
-            try:
-                with urlopen(
-                    f"{prefix}v{version}/{variant_name}-{v}{version}{flavour}-linux-amd64.tar.xz"
-                ) as stream:
-                    buffer = BytesIO(stream.read())
-                    binary_dest.mkdir(parents=True, exist_ok=True)
-                    tar = tarfile.open(fileobj=buffer, mode="r:xz")
-                    tar.extract(variant_name, str(binary_dest))
-            except HTTPError:
-                continue
-            break
-        else:
-            raise RuntimeError(f"Could not download Austin version {version}")
-
-    variant = Variant(str(binary))
-
-    out = variant("-V").stdout
-    assert f"{variant_name} {version}" in out, (f"{variant_name} {version}", out)
-
-    return variant
-
-
 class Outcome:
     def __init__(self, data: list[float]) -> None:
         self.data = data
diff --git a/scripts/common.py b/scripts/common.py
new file mode 100644
index 00000000..e2ecff34
--- /dev/null
+++ b/scripts/common.py
@@ -0,0 +1,59 @@
+import sys
+from itertools import product
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+import tarfile
+from io import BytesIO
+from test.utils import Variant
+from urllib.error import HTTPError
+from urllib.request import urlopen
+import json
+
+
+def get_latest_release() -> str:
+    with urlopen(
+        "https://api.github.com/repos/p403n1x87/austin/releases/latest"
+    ) as stream:
+        return json.loads(stream.read().decode("utf-8"))["tag_name"].strip("v")
+
+
+def download_release(version: str, dest: Path, variant_name: str = "austin") -> Variant:
+    if version == "dev":
+        return Variant(f"src/{variant_name}")
+
+    binary_dest = dest / version
+    binary = binary_dest / variant_name
+
+    if not binary.exists():
+        prefix = "https://github.com/p403n1x87/austin/releases/download/"
+        for flavour, v in product({"-gnu", ""}, {"", "v"}):
+            try:
+                with urlopen(
+                    f"{prefix}v{version}/{variant_name}-{v}{version}{flavour}-linux-amd64.tar.xz"
+                ) as stream:
+                    buffer = BytesIO(stream.read())
+                    binary_dest.mkdir(parents=True, exist_ok=True)
+                    tar = tarfile.open(fileobj=buffer, mode="r:xz")
+                    tar.extract(variant_name, str(binary_dest))
+            except HTTPError:
+                continue
+            break
+        else:
+            raise RuntimeError(f"Could not download Austin version {version}")
+
+    variant = Variant(str(binary))
+
+    out = variant("-V").stdout
+    assert f"{variant_name} {version}" in out, (f"{variant_name} {version}", out)
+
+    return variant
+
+
+def download_latest(dest: Path, variant_name: str = "austin") -> Variant:
+    return download_release(get_latest_release(), dest, variant_name)
+
+
+def get_dev(variant_name: str = "austin") -> Variant:
+    return download_release("dev", None, variant_name)
diff --git a/scripts/requirements-val.txt b/scripts/requirements-val.txt
new file mode 100644
index 00000000..c6dd2b1e
--- /dev/null
+++ b/scripts/requirements-val.txt
@@ -0,0 +1,3 @@
+austin-python~=1.5
+numpy
+scipy
diff --git a/scripts/validation.py b/scripts/validation.py
new file mode 100644
index 00000000..f87927a9
--- /dev/null
+++ b/scripts/validation.py
@@ -0,0 +1,259 @@
+# Run as python3 scripts/validation.py from the repository root directory.
+# Ensure dependencies from requirements-val.txt are installed.
+
+from argparse import ArgumentParser
+from collections import Counter, namedtuple
+from io import BytesIO
+from itertools import chain
+from pathlib import Path
+import re
+import sys
+import typing as t
+
+import common
+
+import numpy as np
+from scipy.stats import f
+
+from austin.format.mojo import (
+    MojoFile,
+    MojoStack,
+    MojoFrameReference,
+    MojoMetric,
+    MojoFrame,
+)
+from test.utils import target
+
+Scenario = namedtuple("Scenario", ["title", "variant", "args"])
+
+SCENARIOS = [
+    Scenario(
+        "Wall time",
+        "austin",
+        (
+            "-i",
+            "1ms",
+            sys.executable,
+            target("target34.py"),
+        ),
+    ),
+    Scenario(
+        "CPU time",
+        "austin",
+        (
+            "-si",
+            "1ms",
+            sys.executable,
+            target("target34.py"),
+        ),
+    ),
+]
+
+
+class AustinFlameGraph(dict):
+    """Element of a free module over a ring."""
+
+    def __call__(self, x):
+        return self.get(x, 0)
+
+    def __add__(self, other):
+        m = self.__class__(self)
+        for k, v in other.items():
+            n = m.setdefault(k, v.__class__()) + v
+            if not n and k in m:
+                del m[k]
+                continue
+            m[k] = n
+        return m
+
+    def __mul__(self, other):
+        m = self.__class__(self)
+        for k, v in self.items():
+            n = v * other
+            if not n and k in m:
+                del m[k]
+                continue
+            m[k] = n
+        return m
+
+    def __rmul__(self, other):
+        return self.__mul__(other)
+
+    def __truediv__(self, other):
+        return self * (1 / other)
+
+    def __rtruediv__(self, other):
+        return self.__div__(other)
+
+    def __sub__(self, other):
+        return self + (-other)
+
+    def __neg__(self):
+        m = self.__class__(self)
+        for k, v in m.items():
+            m[k] = -v
+        return m
+
+    def supp(self):
+        return set(self.keys())
+
+    def to_list(self, domain: list) -> list:
+        return [self(v) for v in domain]
+
+    @classmethod
+    def from_list(cls, stacks: t.List[t.Tuple[str, int]]) -> "AustinFlameGraph":
+        return sum((cls({stack: metric}) for stack, metric in stacks), cls())
+
+    @classmethod
+    def from_mojo(cls, data: bytes) -> "AustinFlameGraph":
+        fg = cls()
+
+        stack: t.List[str] = []
+        metric = 0
+
+        def serialize(frame: MojoFrame) -> str:
+            return ":".join(
+                (
+                    frame.filename.string.value,
+                    frame.scope.string.value,
+                    str(frame.line),
+                    str(frame.line_end),
+                    str(frame.column),
+                    str(frame.column_end),
+                )
+            )
+
+        for e in MojoFile(BytesIO(data)).parse():
+            if isinstance(e, MojoStack):
+                if stack:
+                    fg += cls({";".join(stack): metric})
+                stack.clear()
+                metric = 0
+            elif isinstance(e, MojoFrameReference):
+                stack.append(serialize(e.frame))
+            elif isinstance(e, MojoMetric):
+                metric = e.value
+
+        return fg
+
+
+def hotelling_two_sample_test(X, Y) -> float:
+    nx, p = X.shape
+    ny, q = Y.shape
+
+    assert p == q, "X and Y must have the same dimensionality"
+
+    dof = nx + ny - p - 1
+
+    assert (
+        dof > 0
+    ), f"X ({nx}x{p}) and Y ({ny}x{q}) must have at least p ({p}) + 1 samples"
+
+    g = dof / p / (nx + ny - 2) * (nx * ny) / (nx + ny)
+
+    x_mean = np.mean(X, axis=0)
+    y_mean = np.mean(Y, axis=0)
+    delta = x_mean - y_mean
+
+    x_cov = np.cov(X, rowvar=False)
+    y_cov = np.cov(Y, rowvar=False)
+    pooled_cov = ((nx - 1) * x_cov + (ny - 1) * y_cov) / (nx + ny - 2)
+
+    # Compute the F statistic from the Hotelling T^2 statistic
+    statistic = g * delta.transpose() @ np.linalg.inv(pooled_cov) @ delta
+    f_pdf = f(p, dof)
+
+    return 1 - f_pdf.cdf(statistic)
+
+
+def compare(
+    x: t.List[AustinFlameGraph],
+    y: t.List[AustinFlameGraph],
+    threshold: t.Optional[float] = None,
+) -> float:
+    domain = list(set().union(*(_.supp() for _ in chain(x, y))))
+
+    if threshold is not None:
+        c = Counter()
+        for _ in chain(x, y):
+            c.update(_.supp())
+        domain = sorted([k for k, v in c.items() if v >= threshold])
+
+    X = np.array([f.to_list(domain) for f in x], dtype=np.int32)
+    Y = np.array([f.to_list(domain) for f in y], dtype=np.int32)
+
+    return hotelling_two_sample_test(X, Y)
+
+
+def validate(args, variant: str = "austin", runs: int = 10, p: float = 0.05) -> bool:
+    austin_latest = common.download_latest(dest=Path("/tmp"), variant_name=variant)
+    austin_dev = common.get_dev(variant_name=variant)
+
+    return (
+        compare(
+            *(
+                [
+                    AustinFlameGraph.from_mojo(
+                        austin(
+                            *args,
+                            mojo=True,
+                            convert=False,
+                        ).stdout
+                    )
+                    for _ in range(runs)
+                ]
+                for austin in (austin_latest, austin_dev)
+            ),
+            threshold=runs,  # Keep only the stacks that are present in all runs
+        )
+        > p
+    )
+
+
+if __name__ == "__main__":
+    argp = ArgumentParser()
+
+    argp.add_argument(
+        "-k",
+        type=re.compile,
+        help="Run benchmark scenarios that match the given regular expression",
+    )
+
+    argp.add_argument(
+        "-n",
+        type=int,
+        default=30,
+        help="Number of times to run each scenario",
+    )
+
+    argp.add_argument(
+        "-f",
+        "--format",
+        type=str,
+        choices=["terminal", "markdown"],
+        default="terminal",
+        help="The output format",
+    )
+
+    opts = argp.parse_args()
+
+    print("# Austin Data Validation\n")
+
+    failures: t.List[Scenario] = []
+    for scenario in SCENARIOS:
+        print(
+            f"Validating {scenario.title} ...                                  ",
+            end="\r",
+            flush=True,
+            file=sys.stderr,
+        )
+        if not validate(scenario.args, scenario.variant, runs=opts.n):
+            failures.append(scenario)
+
+    if failures:
+        print("💥 The following scenarios failed to validate:\n")
+
+        for scenario in failures:
+            print(f"- {scenario.title}")
+    else:
+        print(f"✨ 🍰 ✨ All {len(SCENARIOS)} scenarios validated successfully!")
diff --git a/test/utils.py b/test/utils.py
index 7ef9b697..51a0ea8c 100644
--- a/test/utils.py
+++ b/test/utils.py
@@ -149,7 +149,7 @@ def __init__(self, name: str) -> None:
         self.ALL.append(self)
 
     def __call__(
-        self, *args: str, timeout: int = 60, mojo: bool = False
+        self, *args: str, timeout: int = 60, mojo: bool = False, convert: bool = True
     ) -> CompletedProcess:
         if not self.path.is_file():
             pytest.skip(f"Variant '{self}' not available")
@@ -168,7 +168,8 @@ def __call__(
         if mojo and not ({"-o", "-w", "--output", "--where"} & set(args)):
             # We produce MOJO binary data only if we are not writing to file
             # or using the "where" option.
-            result.stdout = demojo(result.stdout)
+            if convert:
+                result.stdout = convert(result.stdout)
         else:
             result.stdout = result.stdout.decode(errors="ignore")
         result.stderr = result.stderr.decode()