Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refresh-sample-testdata refactor #450

Merged
merged 3 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,17 @@ fix: $(FIXERS)

# END: lint-install ../bincapz

SAMPLES_REPO=https://github.com/chainguard-dev/bincapz-samples.git
SAMPLES_HASH=bdcb8c2e9bf557a0abe3e2b0144f437d456299b7
OUT_DIR=out/samples-$(SAMPLES_HASH).tmp
out/samples-$(SAMPLES_HASH):
SAMPLES_REPO ?= chainguard-dev/bincapz-samples
SAMPLES_COMMIT ?= bdcb8c2e9bf557a0abe3e2b0144f437d456299b7
OUT_DIR=out/samples-$(SAMPLES_COMMIT).tmp
out/samples-$(SAMPLES_COMMIT):
mkdir -p out
git clone $(SAMPLES_REPO) $(OUT_DIR)
git -C $(OUT_DIR) checkout $(SAMPLES_HASH)
git clone https://github.com/$(SAMPLES_REPO).git $(OUT_DIR)
git -C $(OUT_DIR) checkout $(SAMPLES_COMMIT)
find $(OUT_DIR) -name "*.xz" -execdir tar xJvf "{}" \;
mv $(OUT_DIR) $(basename $(OUT_DIR))

prepare-samples: out/samples-$(SAMPLES_HASH)
prepare-samples: out/samples-$(SAMPLES_COMMIT)
cp -a test_data/. $(basename $(OUT_DIR))

.PHONY: test
Expand Down Expand Up @@ -124,9 +124,8 @@ update-third-party:
./third_party/yara/update.sh

.PHONY: refresh-sample-testdata out/bincapz
refresh-sample-testdata: clone-samples out/bincapz
cp ./test_data/refresh-testdata.sh samples/
./out/samples/refresh-testdata.sh ./out/bincapz
refresh-sample-testdata: out/samples-$(SAMPLES_COMMIT) out/bincapz
./test_data/refresh-testdata.sh ./out/bincapz out/samples-$(SAMPLES_COMMIT)

ARCH ?= $(shell uname -m)
CRANE_VERSION=v0.20.2
Expand Down
184 changes: 99 additions & 85 deletions test_data/refresh-testdata.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,110 +2,124 @@
# refresh testdata with latest bincapz
#
# usage:
# ./refresh-testdata.sh </path/to/bincapz>
# ./refresh-testdata.sh </path/to/bincapz> </path/to/samples>
#
# NOTE: This is slow to run, so for small changes you are better
# off manually updating a single test file.

set -ux -o pipefail
set -eu -o pipefail

MAX_PROCS=${MAX_PROCS:=8}
readonly bincapz=$(realpath $1)
readonly root_dir=$(dirname $0)
cd "${root_dir}"
readonly samples=$(realpath $2)

cd "$(dirname $0)"
cd ..
readonly root_dir=$(pwd)
readonly test_data="${root_dir}/test_data"

if [[ -z "${bincapz}" ]]; then
echo "must pass location of bincapz"
exit 1
echo "must pass location of bincapz"
exit 1
fi

if [[ ! -x "${bincapz}" ]]; then
echo "bincapz at ${bincapz} is not executable"
exit 1
echo "bincapz at ${bincapz} is not executable"
exit 1
fi

# OCI edge case
${bincapz} --format=simple \
--min-risk any \
--min-file-risk any \
-o ../pkg/action/testdata/scan_oci \
../pkg/action/testdata/static.tar.xz; sed -i.bak 's|\.\.\/pkg\/action\/||g' ../pkg/action/testdata/scan_oci && rm ../pkg/action/testdata/scan_oci.bak &

# diffs don't follow an easy rule
${bincapz} --format=markdown \
-o ../test_data/macOS/2023.3CX/libffmpeg.dirty.mdiff \
diff \
macOS/2023.3CX/libffmpeg.dylib \
macOS/2023.3CX/libffmpeg.dirty.dylib &

${bincapz} --format=markdown \
-o ../test_data/macOS/clean/ls.mdiff \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls &

${bincapz} --format=simple \
--min-level 2 \
--min-file-level 2 \
-o ../test_data/macOS/clean/ls.sdiff.level_2 \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls &

${bincapz} --format=simple \
--min-level 1 \
--min-file-level 2 \
-o ../test_data/macOS/clean/ls.sdiff.trigger_2 \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls &

${bincapz} --format=simple \
--min-level 1 \
--min-file-level 3 \
-o ../test_data/macOS/clean/ls.sdiff.trigger_3 \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls &
readonly qscript=$(mktemp)
function addq() {
echo "$*" >>"${qscript}"
}

# OCI edge case
cd "${root_dir}/pkg/action"
echo "regenerating test data, max_procs=${MAX_PROCS} ..."
${bincapz} --format=simple \
-o ../test_data/linux/2024.sbcl.market/sbcl.sdiff \
diff \
linux/2024.sbcl.market/sbcl.clean \
linux/2024.sbcl.market/sbcl.dirty &
--min-risk any \
--min-file-risk any \
-o testdata/scan_oci \
analyze testdata/static.tar.xz

${bincapz} --format=simple \
-o ../test_data/linux/2023.FreeDownloadManager/freedownloadmanager.sdiff \
diff \
linux/2023.FreeDownloadManager/freedownloadmanager_clear_postinst \
linux/2023.FreeDownloadManager/freedownloadmanager_infected_postinst &
cd "${samples}"

${bincapz} --format=simple \
-o ../test_data/linux/clean/aws-c-io/aws-c-io.sdiff \
diff \
linux/clean/aws-c-io/aws-c-io-0.14.10-r0.spdx.json \
linux/clean/aws-c-io/aws-c-io-0.14.11-r0.spdx.json &
wait

for f in $(find * -name "*.simple"); do
prog=$(echo ${f} | sed s/\.simple$//g)
if [[ -f "${prog}" ]]; then
${bincapz} --format=simple -o "../test_data/${f}" scan "${prog}" &
fi
# diffs don't follow an easy rule
addq ${bincapz} --format=markdown \
-o "${test_data}/macOS/2023.3CX/libffmpeg.dirty.mdiff" \
diff \
macOS/2023.3CX/libffmpeg.dylib \
macOS/2023.3CX/libffmpeg.dirty.dylib

addq ${bincapz} --format=markdown \
-o "${test_data}/macOS/clean/ls.mdiff" \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls

addq ${bincapz} --format=simple \
--min-level 2 \
--min-file-level 2 \
-o "${test_data}/macOS/clean/ls.sdiff.level_2" \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls

addq ${bincapz} --format=simple \
--min-level 1 \
--min-file-level 2 \
-o "${test_data}/macOS/clean/ls.sdiff.trigger_2" \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls

addq ${bincapz} --format=simple \
--min-level 1 \
--min-file-level 3 \
-o "${test_data}/macOS/clean/ls.sdiff.trigger_3" \
diff \
linux/clean/ls.x86_64 \
macOS/clean/ls

addq ${bincapz} --format=simple \
-o "${test_data}/linux/2024.sbcl.market/sbcl.sdiff" \
diff \
linux/2024.sbcl.market/sbcl.clean \
linux/2024.sbcl.market/sbcl.dirty

addq ${bincapz} --format=simple \
-o "${test_data}/linux/2023.FreeDownloadManager/freedownloadmanager.sdiff" \
diff \
linux/2023.FreeDownloadManager/freedownloadmanager_clear_postinst \
linux/2023.FreeDownloadManager/freedownloadmanager_infected_postinst

addq ${bincapz} --format=simple \
-o "${test_data}/linux/clean/aws-c-io/aws-c-io.sdiff" \
diff \
linux/clean/aws-c-io/aws-c-io-0.14.10-r0.spdx.json \
linux/clean/aws-c-io/aws-c-io-0.14.11-r0.spdx.json

for f in $(find "${test_data}" -name "*.simple"); do
prog=$(echo $f | sed -e s#"${test_data}/"## -e s#\.simple\$##)
if [[ -f "${prog}" ]]; then
addq ${bincapz} --format=simple -o "${f}" analyze "${prog}"
fi
done
wait

for f in $(find * -name "*.md"); do
prog=$(echo ${f} | sed s/\.md$//g)
if [[ -f "${prog}" ]]; then
${bincapz} --format=markdown -o "../test_data/${f}" scan "${prog}" &
fi
for f in $(find "${test_data}" -name "*.md"); do
prog=$(echo $f | sed -e s#"${test_data}/"## -e s#\.md\$##)
if [[ -f "${prog}" ]]; then
addq ${bincapz} --format=markdown -o "${f}" analyze "${prog}"
fi
done
wait

for f in $(find * -name "*.json"); do
prog=$(echo ${f} | sed s/\.json$//g)
if [[ -f "${prog}" ]]; then
${bincapz} --format=json -o "../test_data/${f}" scan "${prog}" &
fi
for f in $(find "${test_data}" -name "*.json"); do
prog=$(echo $f | sed -e s#"${test_data}/"## -e s#\.json\$##)
if [[ -f "${prog}" ]]; then
addq ${bincapz} --format=json -o "${f}" analyze "${prog}"
fi
done
wait

echo "processing queue with length: $(wc -l ${qscript})"
tr '\n' '\0' <"${qscript}" | xargs -0 -n1 -P"${MAX_PROCS}" -J% sh -c '%'
echo "test data regeneration complete!!"
Loading