From 071ef0093185b88a841aa11c86e72a31b7cf1c1f Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 17 Oct 2023 02:29:57 -0700 Subject: [PATCH 1/4] Add performance regression testing workflow --- .github/workflows/performance-regression.yml | 146 +++++++++++++++++++ 1 file changed, 146 insertions(+) create mode 100644 .github/workflows/performance-regression.yml diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml new file mode 100644 index 0000000..f8b4272 --- /dev/null +++ b/.github/workflows/performance-regression.yml @@ -0,0 +1,146 @@ +name: Performance Regression Detection +on: + pull_request: + branches: [ master ] +jobs: + detect-regression: + name: Detect Regression + runs-on: ubuntu-latest + strategy: + fail-fast: true + env: + CC: 'clang' + CXX: 'clang++' + steps: + - name: Install Dependencies + run: | + apt-get update + apt-get install git cmake clang-14 python3-pip -y + [[ -e /usr/bin/cmake ]] || ln -s `which cmake3` /usr/bin/cmake + + - name: Get Data Generator + uses: actions/checkout@v2 + with: + repository: amazon-ion/ion-data-generator + ref: main + path: ion-data-generator + + - name: Build Ion Data Generator + run: cd ion-data-generator && mvn clean install + + - name: Generate Data + env: + jar_file: ion-data-generator/target/ion-data-generator-1.0-SNAPSHOT.jar + schema_dir: ion-data-generator/tst/com/amazon/ion/workflow + run: | + mkdir -p testData + # Generate approximately 200KB of data for each dataset, so that we can expect similar orders of magnitude for + # our threshold. + java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema01.isl testData/realWorldDataSchema01.10n + java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema02.isl testData/realWorldDataSchema02.10n + java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema03.isl testData/realWorldDataSchema03.10n + + java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/nestedList.isl testData/nestedList.10n + java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/nestedStruct.isl testData/nestedStruct.10n + java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/sexp.isl testData/sexp.10n + + - name: Fetch PR Candidate + uses: actions/checkout@v3 + with: + submodules: recursive + path: candidate + + - name: Build PR Candidate + run: | + mkdir -p candidate/build/profiling && cd candidate/build/profiling + cmake -DCMAKE_BUILD_TYPE=Profiling -DIONC_BUILD_TESTS=OFF ../.. + make clean && make IonCBench + + - name: Fetch PR Baseline + uses: actions/checkout@v3 + with: + ref: ${{ github.base_ref }} + submodules: recursive + path: baseline + + - name: Build PR Baseline + run: | + mkdir -p baseline/build/profiling && cd baseline/build/profiling + cmake -DCMAKE_BUILD_TYPE=Profiling -DIONC_BUILD_TESTS=OFF ../.. + make clean && make IonCBench + + # This step runs benchmarks for the current ion-c repo. + - name: 'Benchmark: Baseline' + env: + cli_path: baseline/build/profiling/tools/ion-bench/src/IonCBench + run: | + $cli_path -b deserialize_all -l ion-c-binary \ + --benchmark_context=uname="`uname -srm`" \ + --benchmark_context=proc="`cat /proc/cpuinfo | fgrep 'model name' | head -n 1 | cut -d: -f2 | cut -d' ' -f2-`" \ + --benchmark_repetitions=20 \ + --benchmark_out_format=json \ + --benchmark_out='./baseline.json' \ + --benchmark_min_warmup_time=5 \ + -d testData/nestedStruct.10n \ + -d testData/nestedList.10n \ + -d testData/sexp.10n \ + -d testData/realWorldDataSchema01.10n \ + -d testData/realWorldDataSchema02.10n \ + -d testData/realWorldDataSchema03.10n + # This step runs benchmarks on each of the generated datsets for the new revision. It does this through + # the 'compare.py' script provided by google-benchmark, which will compare the results of the benchmarks to + # the results of the baseline benchmarks from the previous step. + # + # The compare script uses the defined 'alpha' environment variable to perform a null-hypothesis test, + # which is used to determine whether the two sets of benchmark times come from the same distribution. + - name: 'Benchmark: PR Candidate' + env: + compare: candidate/tools/ion-bench/deps/google-benchmark/tools/compare.py + cli_path: candidate/build/profiling/tools/ion-bench/src/IonCBench + alpha: 0.03 + run: | + pip install -r candidate/tools/ion-bench/deps/google-benchmark/tools/requirements.txt + $compare -a -d ./results.json --alpha $alpha benchmarks \ + ./baseline.json \ + $cli_path -b deserialize_all -l ion-c-binary \ + --benchmark_context=uname="`uname -srm`" \ + --benchmark_context=proc="`cat /proc/cpuinfo | fgrep 'model name' | head -n 1 | cut -d: -f2 | cut -d' ' -f2-`" \ + --benchmark_repetitions=20 \ + --benchmark_out_format=json \ + --benchmark_out='./candidate.json' \ + --benchmark_min_warmup_time=5 \ + -d testData/nestedStruct.10n \ + -d testData/nestedList.10n \ + -d testData/sexp.10n \ + -d testData/realWorldDataSchema01.10n \ + -d testData/realWorldDataSchema02.10n \ + -d testData/realWorldDataSchema03.10n + + # Upload the results.json for further review. + - name: 'Upload Results' + uses: actions/upload-artifact@v2 + with: + name: results.json + path: ./results.json + + # This step compares the 2 benchmark runs and attempts to determine whether the runs are significantly + # different enough to warrant a failure to at least get someone to look at the results. + # + # Currently, this check looks at the generated comparison of the MEAN of each benchmarks' CPU time. We + # do this for now, rather than use the null-hypothesis results, until we get a better understanding of + # how the timings will be in GHA. + - name: 'Check Results' + env: + # Threshold Percentage, currently 5%. + threshold_perc: 5 + run: | + echo "Printing Results" + RESULTS=$(cat results.json | jq '.[] | select(.run_type == "aggregate" and (.name | endswith("_mean"))) | {name:.name,cpu_time_perc_diff:(.measurements[0].cpu*100)}|select(.cpu_time_perc_diff > '"${threshold_perc}"')') + if [[ -z "$RESULTS" ]]; then + echo "No sizeable difference identified" + else + echo "CPU Time differences greater than ${threshold_perc}%" + echo "$RESULTS" | jq -r '"\(.name) = \(.cpu_time_perc_diff)"' + exit 1 + fi + From d16073edbf7df9b4ad98e6f174bffad674290fa8 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 17 Oct 2023 03:28:52 -0700 Subject: [PATCH 2/4] Remove dependency installs from testing, GHA should have these installed already --- .github/workflows/performance-regression.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml index f8b4272..cf2fb75 100644 --- a/.github/workflows/performance-regression.yml +++ b/.github/workflows/performance-regression.yml @@ -12,12 +12,6 @@ jobs: CC: 'clang' CXX: 'clang++' steps: - - name: Install Dependencies - run: | - apt-get update - apt-get install git cmake clang-14 python3-pip -y - [[ -e /usr/bin/cmake ]] || ln -s `which cmake3` /usr/bin/cmake - - name: Get Data Generator uses: actions/checkout@v2 with: From e502337f48f881783b72336a441f34790f54efd3 Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 17 Oct 2023 03:32:48 -0700 Subject: [PATCH 3/4] Add depth, and request tags, for code checkout --- .github/workflows/performance-regression.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml index cf2fb75..8bc8843 100644 --- a/.github/workflows/performance-regression.yml +++ b/.github/workflows/performance-regression.yml @@ -43,6 +43,8 @@ jobs: with: submodules: recursive path: candidate + fetch-tags: true + fetch-depth: 50 - name: Build PR Candidate run: | @@ -56,6 +58,8 @@ jobs: ref: ${{ github.base_ref }} submodules: recursive path: baseline + fetch-tags: true + fetch-depth: 50 - name: Build PR Baseline run: | From ba8b3a22dac86ee8a0bdc45e2d6e40c656b5412e Mon Sep 17 00:00:00 2001 From: Richard Giliam Date: Tue, 17 Oct 2023 13:35:52 -0700 Subject: [PATCH 4/4] Changes from PR feedback --- .github/workflows/performance-regression.yml | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/performance-regression.yml b/.github/workflows/performance-regression.yml index 8bc8843..f715a8a 100644 --- a/.github/workflows/performance-regression.yml +++ b/.github/workflows/performance-regression.yml @@ -2,6 +2,8 @@ name: Performance Regression Detection on: pull_request: branches: [ master ] + paths: + - 'ionc/*' jobs: detect-regression: name: Detect Regression @@ -13,7 +15,7 @@ jobs: CXX: 'clang++' steps: - name: Get Data Generator - uses: actions/checkout@v2 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 with: repository: amazon-ion/ion-data-generator ref: main @@ -30,16 +32,12 @@ jobs: mkdir -p testData # Generate approximately 200KB of data for each dataset, so that we can expect similar orders of magnitude for # our threshold. - java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema01.isl testData/realWorldDataSchema01.10n - java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema02.isl testData/realWorldDataSchema02.10n - java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/realWorldDataSchema03.isl testData/realWorldDataSchema03.10n - - java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/nestedList.isl testData/nestedList.10n - java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/nestedStruct.isl testData/nestedStruct.10n - java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/sexp.isl testData/sexp.10n + for test_name in realWorldDataSchema01 realWorldDataSchema02 realWorldDataSchema03 nestedList nestedStruct sexp; do + java -jar $jar_file generate -S 200000 --input-ion-schema $schema_dir/${test_name}.isl testData/${test_name}.10n + done - name: Fetch PR Candidate - uses: actions/checkout@v3 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 with: submodules: recursive path: candidate @@ -53,7 +51,7 @@ jobs: make clean && make IonCBench - name: Fetch PR Baseline - uses: actions/checkout@v3 + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 with: ref: ${{ github.base_ref }} submodules: recursive