From 071ef0093185b88a841aa11c86e72a31b7cf1c1f Mon Sep 17 00:00:00 2001
From: Richard Giliam <glitch@amazon.com>
Date: Tue, 17 Oct 2023 02:29:57 -0700
Subject: [PATCH 1/4] Add performance regression testing workflow

---
 .github/workflows/performance-regression.yml | 146 +++++++++++++++++++
 1 file changed, 146 insertions(+)
 create mode 100644 .github/workflows/performance-regression.yml

diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml
new file mode 100644
index 0000000..f8b4272
--- /dev/null
+++ b/.github/workflows/performance-regression.yml
@@ -0,0 +1,146 @@
+name: Performance Regression Detection
+on:
+  pull_request:
+    branches: [ master ]
+jobs:
+  detect-regression:
+    name: Detect Regression
+    runs-on: ubuntu-latest
+    strategy:
+     fail-fast: true
+    env:
+      CC: 'clang'
+      CXX: 'clang++'
+    steps:
+      - name: Install Dependencies
+        run: |
+          apt-get update
+          apt-get install git cmake clang-14 python3-pip -y
+          [[ -e /usr/bin/cmake ]] || ln -s `which cmake3` /usr/bin/cmake
+
+      - name: Get Data Generator
+        uses: actions/checkout@v2
+        with:
+          repository: amazon-ion/ion-data-generator
+          ref: main
+          path: ion-data-generator
+
+      - name: Build Ion Data Generator
+        run: cd ion-data-generator && mvn clean install
+
+      - name: Generate Data
+        env:
+          jar_file: ion-data-generator/target/ion-data-generator-1.0-SNAPSHOT.jar
+          schema_dir: ion-data-generator/tst/com/amazon/ion/workflow
+        run: |
+          mkdir -p testData
+          # Generate approximately 200KB of data for each dataset, so that we can expect similar orders of magnitude for
+          # our threshold.
+          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema01.isl testData/realWorldDataSchema01.10n
+          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema02.isl testData/realWorldDataSchema02.10n
+          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema03.isl testData/realWorldDataSchema03.10n
+
+          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/nestedList.isl testData/nestedList.10n
+          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/nestedStruct.isl testData/nestedStruct.10n
+          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/sexp.isl testData/sexp.10n
+
+      - name: Fetch PR Candidate
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+          path: candidate
+
+      - name: Build PR Candidate
+        run: |
+          mkdir -p candidate/build/profiling && cd candidate/build/profiling
+          cmake -DCMAKE_BUILD_TYPE=Profiling -DIONC_BUILD_TESTS=OFF ../..
+          make clean && make IonCBench
+
+      - name: Fetch PR Baseline
+        uses: actions/checkout@v3
+        with:
+          ref: ${{ github.base_ref }}
+          submodules: recursive
+          path: baseline
+
+      - name: Build PR Baseline
+        run: |
+          mkdir -p baseline/build/profiling && cd baseline/build/profiling
+          cmake -DCMAKE_BUILD_TYPE=Profiling -DIONC_BUILD_TESTS=OFF ../..
+          make clean && make IonCBench
+
+      # This step runs benchmarks for the current ion-c repo.
+      - name: 'Benchmark: Baseline'
+        env:
+          cli_path: baseline/build/profiling/tools/ion-bench/src/IonCBench
+        run: |
+          $cli_path -b deserialize_all -l ion-c-binary \
+                    --benchmark_context=uname="`uname -srm`" \
+                    --benchmark_context=proc="`cat /proc/cpuinfo | fgrep 'model name' | head -n 1 | cut -d: -f2 | cut -d' ' -f2-`" \
+                    --benchmark_repetitions=20 \
+                    --benchmark_out_format=json \
+                    --benchmark_out='./baseline.json' \
+                    --benchmark_min_warmup_time=5 \
+                    -d testData/nestedStruct.10n \
+                    -d testData/nestedList.10n \
+                    -d testData/sexp.10n \
+                    -d testData/realWorldDataSchema01.10n \
+                    -d testData/realWorldDataSchema02.10n \
+                    -d testData/realWorldDataSchema03.10n
+      # This step runs benchmarks on each of the generated datsets for the new revision. It does this through
+      # the 'compare.py' script provided by google-benchmark, which will compare the results of the benchmarks to
+      # the results of the baseline benchmarks from the previous step.
+      #
+      # The compare script uses the defined 'alpha' environment variable to perform a null-hypothesis test,
+      # which is used to determine whether the two sets of benchmark times come from the same distribution.
+      - name: 'Benchmark: PR Candidate'
+        env:
+          compare: candidate/tools/ion-bench/deps/google-benchmark/tools/compare.py
+          cli_path: candidate/build/profiling/tools/ion-bench/src/IonCBench
+          alpha: 0.03
+        run: |
+          pip install -r candidate/tools/ion-bench/deps/google-benchmark/tools/requirements.txt
+          $compare -a -d ./results.json --alpha $alpha benchmarks \
+            ./baseline.json \
+            $cli_path -b deserialize_all -l ion-c-binary \
+                    --benchmark_context=uname="`uname -srm`" \
+                    --benchmark_context=proc="`cat /proc/cpuinfo | fgrep 'model name' | head -n 1 | cut -d: -f2 | cut -d' ' -f2-`" \
+                    --benchmark_repetitions=20 \
+                    --benchmark_out_format=json \
+                    --benchmark_out='./candidate.json' \
+                    --benchmark_min_warmup_time=5 \
+                    -d testData/nestedStruct.10n \
+                    -d testData/nestedList.10n \
+                    -d testData/sexp.10n \
+                    -d testData/realWorldDataSchema01.10n \
+                    -d testData/realWorldDataSchema02.10n \
+                    -d testData/realWorldDataSchema03.10n
+
+      # Upload the results.json for further review.
+      - name: 'Upload Results'
+        uses: actions/upload-artifact@v2
+        with:
+          name: results.json
+          path: ./results.json
+
+      # This step compares the 2 benchmark runs and attempts to determine whether the runs are significantly
+      # different enough to warrant a failure to at least get someone to look at the results.
+      #
+      # Currently, this check looks at the generated comparison of the MEAN of each benchmarks' CPU time. We
+      # do this for now, rather than use the null-hypothesis results, until we get a better understanding of
+      # how the timings will be in GHA.
+      - name: 'Check Results'
+        env:
+          # Threshold Percentage, currently 5%.
+          threshold_perc: 5
+        run: |
+          echo "Printing Results"
+          RESULTS=$(cat results.json | jq '.[] | select(.run_type == "aggregate" and (.name | endswith("_mean"))) | {name:.name,cpu_time_perc_diff:(.measurements[0].cpu*100)}|select(.cpu_time_perc_diff > '"${threshold_perc}"')')
+          if [[ -z "$RESULTS" ]]; then
+            echo "No sizeable difference identified"
+          else
+            echo "CPU Time differences greater than ${threshold_perc}%"
+            echo "$RESULTS" | jq -r '"\(.name) = \(.cpu_time_perc_diff)"'
+            exit 1
+          fi
+

From d16073edbf7df9b4ad98e6f174bffad674290fa8 Mon Sep 17 00:00:00 2001
From: Richard Giliam <glitch@amazon.com>
Date: Tue, 17 Oct 2023 03:28:52 -0700
Subject: [PATCH 2/4] Remove dependency installs from testing, GHA should have
 these installed already

---
 .github/workflows/performance-regression.yml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml
index f8b4272..cf2fb75 100644
--- a/.github/workflows/performance-regression.yml
+++ b/.github/workflows/performance-regression.yml
@@ -12,12 +12,6 @@ jobs:
       CC: 'clang'
       CXX: 'clang++'
     steps:
-      - name: Install Dependencies
-        run: |
-          apt-get update
-          apt-get install git cmake clang-14 python3-pip -y
-          [[ -e /usr/bin/cmake ]] || ln -s `which cmake3` /usr/bin/cmake
-
       - name: Get Data Generator
         uses: actions/checkout@v2
         with:

From e502337f48f881783b72336a441f34790f54efd3 Mon Sep 17 00:00:00 2001
From: Richard Giliam <glitch@amazon.com>
Date: Tue, 17 Oct 2023 03:32:48 -0700
Subject: [PATCH 3/4] Add depth, and request tags, for code checkout

---
 .github/workflows/performance-regression.yml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml
index cf2fb75..8bc8843 100644
--- a/.github/workflows/performance-regression.yml
+++ b/.github/workflows/performance-regression.yml
@@ -43,6 +43,8 @@ jobs:
         with:
           submodules: recursive
           path: candidate
+          fetch-tags: true
+          fetch-depth: 50
 
       - name: Build PR Candidate
         run: |
@@ -56,6 +58,8 @@ jobs:
           ref: ${{ github.base_ref }}
           submodules: recursive
           path: baseline
+          fetch-tags: true
+          fetch-depth: 50
 
       - name: Build PR Baseline
         run: |

From ba8b3a22dac86ee8a0bdc45e2d6e40c656b5412e Mon Sep 17 00:00:00 2001
From: Richard Giliam <glitch@amazon.com>
Date: Tue, 17 Oct 2023 13:35:52 -0700
Subject: [PATCH 4/4] Changes from PR feedback

---
 .github/workflows/performance-regression.yml | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml
index 8bc8843..f715a8a 100644
--- a/.github/workflows/performance-regression.yml
+++ b/.github/workflows/performance-regression.yml
@@ -2,6 +2,8 @@ name: Performance Regression Detection
 on:
   pull_request:
     branches: [ master ]
+    paths:
+      - 'ionc/*'
 jobs:
   detect-regression:
     name: Detect Regression
@@ -13,7 +15,7 @@ jobs:
       CXX: 'clang++'
     steps:
       - name: Get Data Generator
-        uses: actions/checkout@v2
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
         with:
           repository: amazon-ion/ion-data-generator
           ref: main
@@ -30,16 +32,12 @@ jobs:
           mkdir -p testData
           # Generate approximately 200KB of data for each dataset, so that we can expect similar orders of magnitude for
           # our threshold.
-          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema01.isl testData/realWorldDataSchema01.10n
-          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema02.isl testData/realWorldDataSchema02.10n
-          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema03.isl testData/realWorldDataSchema03.10n
-
-          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/nestedList.isl testData/nestedList.10n
-          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/nestedStruct.isl testData/nestedStruct.10n
-          java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/sexp.isl testData/sexp.10n
+          for test_name in realWorldDataSchema01 realWorldDataSchema02 realWorldDataSchema03 nestedList nestedStruct sexp; do
+            java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/${test_name}.isl testData/${test_name}.10n
+          done
 
       - name: Fetch PR Candidate
-        uses: actions/checkout@v3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
         with:
           submodules: recursive
           path: candidate
@@ -53,7 +51,7 @@ jobs:
           make clean && make IonCBench
 
       - name: Fetch PR Baseline
-        uses: actions/checkout@v3
+        uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11
         with:
           ref: ${{ github.base_ref }}
           submodules: recursive