From 1d86bd87fbf5fe89d083c8c40275d6d4ef74d411 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sat, 13 Apr 2024 12:09:09 +0200 Subject: [PATCH 1/7] Fix --split-max-size Byte size calculation was done on int and overflowed. --- examples/gguf-split/gguf-split.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/gguf-split/gguf-split.cpp b/examples/gguf-split/gguf-split.cpp index 24acbf02a4eed..39c75e0a7a802 100644 --- a/examples/gguf-split/gguf-split.cpp +++ b/examples/gguf-split/gguf-split.cpp @@ -59,10 +59,10 @@ static size_t split_str_to_n_bytes(std::string str) { int n; if (str.back() == 'M') { sscanf(str.c_str(), "%d", &n); - n_bytes = n * 1024 * 1024; // megabytes + n_bytes = (size_t)n * 1024 * 1024; // megabytes } else if (str.back() == 'G') { sscanf(str.c_str(), "%d", &n); - n_bytes = n * 1024 * 1024 * 1024; // gigabytes + n_bytes = (size_t)n * 1024 * 1024 * 1024; // gigabytes } else { throw std::invalid_argument("error: supported units are M (megabytes) or G (gigabytes), but got: " + std::string(1, str.back())); } From 6738215a10bd4d128357b264eba1382303ef26ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sat, 13 Apr 2024 16:01:34 +0200 Subject: [PATCH 2/7] add tests.sh --- examples/gguf-split/tests.sh | 81 ++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 examples/gguf-split/tests.sh diff --git a/examples/gguf-split/tests.sh b/examples/gguf-split/tests.sh new file mode 100644 index 0000000000000..033c5f3858b21 --- /dev/null +++ b/examples/gguf-split/tests.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +set -eu + +if [ $# -lt 1 ] +then + echo "usage: $0 path_to_build_binary [path_to_temp_folder]" + echo "example: $0 ../../build/bin ../../tmp" + exit 1 +fi + +if [ $# -gt 1 ] +then + TMP_DIR=$2 +else + TMP_DIR=/tmp +fi + +set -x + +SPLIT=$1/gguf-split +MAIN=$1/main +WORK_PATH=$TMP_DIR +CUR_DIR=$(pwd) + +# 1. Get a model +( + cd $WORK_PATH + "$CUR_DIR"/../../scripts/hf.sh --repo ggml-org/gemma-1.1-2b-it-Q8_0-GGUF --file gemma-1.1-2b-it.Q8_0.gguf +) +echo PASS + +# 2. Split with max tensors strategy +$SPLIT --split-max-tensors 28 $WORK_PATH/gemma-1.1-2b-it.Q8_0.gguf $WORK_PATH/ggml-model-split +echo PASS +echo + +# 2b. Test the sharded model is loading properly +$MAIN --model $WORK_PATH/ggml-model-split-00001-of-00006.gguf --random-prompt --n-predict 32 +echo PASS +echo + +# 3. Merge +$SPLIT --merge $WORK_PATH/ggml-model-split-00001-of-00006.gguf $WORK_PATH/ggml-model-merge.gguf +echo PASS +echo + +# 3b. Test the merged model is loading properly +$MAIN --model $WORK_PATH/ggml-model-merge.gguf --random-prompt --n-predict 32 +echo PASS +echo + +# 4. Split with no tensor in metadata +#$SPLIT --split-max-tensors 32 --no-tensor-in-metadata $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-32-tensors +#echo PASS +#echo + +# 4b. Test the sharded model is loading properly +#$MAIN --model $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf --random-prompt --n-predict 32 +#echo PASS +#echo + +# 5. Merge +#$SPLIT --merge $WORK_PATH/ggml-model-split-32-tensors-00001-of-00006.gguf $WORK_PATH/ggml-model-merge-2.gguf +#echo PASS +#echo + +# 5b. Test the merged model is loading properly +#$MAIN --model $WORK_PATH/ggml-model-merge-2.gguf --random-prompt --n-predict 32 +#echo PASS +#echo + +# 6. Split with size strategy +$SPLIT --split-max-size 2G $WORK_PATH/ggml-model-merge.gguf $WORK_PATH/ggml-model-split-2G +echo PASS +echo + +# 6b. Test the sharded model is loading properly +$MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --random-prompt --n-predict 32 +echo PASS +echo From d42add49d4530fd5547ac77f0313f8dae557c41b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sat, 13 Apr 2024 16:04:00 +0200 Subject: [PATCH 3/7] add examples test scripts to ci run Will autodiscover examples/*/tests.sh scripts and run them. --- ci/run.sh | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/ci/run.sh b/ci/run.sh index 19776b5f7c6ff..8521e1f074339 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -153,6 +153,52 @@ function gg_sum_ctest_release { gg_printf '```\n' } +# test_scripts_debug + +function gg_run_test_scripts_debug { + cd ${SRC} + + set -e + + (find ./examples -mindepth 2 -maxdepth 2 -name 'tests.sh' -execdir time bash "{}" "$SRC/build-ci-debug/bin" "$MNT/models" \;) 2>&1 | tee -a $OUT/${ci}-scripts.log + + set +e +} + +function gg_sum_test_scripts_debug { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs test scripts in debug mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" + gg_printf '```\n' + gg_printf '\n' +} + +# test_scripts_release + +function gg_run_test_scripts_release { + cd ${SRC} + + set -e + + (find ./examples -mindepth 2 -maxdepth 2 -name 'tests.sh' -execdir time bash "{}" "$SRC/build-ci-release/bin" "$MNT/models" \;) 2>&1 | tee -a $OUT/${ci}-scripts.log + + set +e +} + +function gg_sum_test_scripts_release { + gg_printf '### %s\n\n' "${ci}" + + gg_printf 'Runs test scripts in release mode\n' + gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" + gg_printf '```\n' + gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" + gg_printf '```\n' + gg_printf '\n' +} + function gg_get_model { local gguf_3b="$MNT/models/open-llama/3B-v2/ggml-model-f16.gguf" local gguf_7b="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf" @@ -642,6 +688,9 @@ test $ret -eq 0 && gg_run ctest_release if [ -z ${GG_BUILD_LOW_PERF} ]; then test $ret -eq 0 && gg_run embd_bge_small + test $ret -eq 0 && gg_run test_scripts_debug + test $ret -eq 0 && gg_run test_scripts_release + if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then if [ -z ${GG_BUILD_CUDA} ]; then test $ret -eq 0 && gg_run open_llama_3b_v2 From 18ed9ed57f55627cdc51978ebb81210d017d86e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sat, 13 Apr 2024 22:22:29 +0200 Subject: [PATCH 4/7] move WORK_PATH to a subdirectory --- examples/gguf-split/tests.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/gguf-split/tests.sh b/examples/gguf-split/tests.sh index 033c5f3858b21..d6079b497ecc8 100644 --- a/examples/gguf-split/tests.sh +++ b/examples/gguf-split/tests.sh @@ -20,9 +20,11 @@ set -x SPLIT=$1/gguf-split MAIN=$1/main -WORK_PATH=$TMP_DIR +WORK_PATH=$TMP_DIR/gguf-split CUR_DIR=$(pwd) +mkdir -p "$WORK_PATH" + # 1. Get a model ( cd $WORK_PATH From e53bc29c25a1740676e3de9acfe3e3dc554ed512 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sun, 14 Apr 2024 11:04:02 +0200 Subject: [PATCH 5/7] clean up before and after test --- examples/gguf-split/tests.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/examples/gguf-split/tests.sh b/examples/gguf-split/tests.sh index d6079b497ecc8..879522f7e2512 100644 --- a/examples/gguf-split/tests.sh +++ b/examples/gguf-split/tests.sh @@ -25,6 +25,9 @@ CUR_DIR=$(pwd) mkdir -p "$WORK_PATH" +# Clean up in case of previously failed test +rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf + # 1. Get a model ( cd $WORK_PATH @@ -81,3 +84,6 @@ echo $MAIN --model $WORK_PATH/ggml-model-split-2G-00001-of-00002.gguf --random-prompt --n-predict 32 echo PASS echo + +# Clean up +rm -f $WORK_PATH/ggml-model-split*.gguf $WORK_PATH/ggml-model-merge*.gguf From 708a0b05165f366e24bb4f0a9141e4bcc7b08437 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sun, 14 Apr 2024 11:15:49 +0200 Subject: [PATCH 6/7] explicitly define which scripts to run --- ci/run.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/run.sh b/ci/run.sh index 8521e1f074339..085dfd42faf32 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -160,7 +160,7 @@ function gg_run_test_scripts_debug { set -e - (find ./examples -mindepth 2 -maxdepth 2 -name 'tests.sh' -execdir time bash "{}" "$SRC/build-ci-debug/bin" "$MNT/models" \;) 2>&1 | tee -a $OUT/${ci}-scripts.log + (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log set +e } @@ -183,7 +183,7 @@ function gg_run_test_scripts_release { set -e - (find ./examples -mindepth 2 -maxdepth 2 -name 'tests.sh' -execdir time bash "{}" "$SRC/build-ci-release/bin" "$MNT/models" \;) 2>&1 | tee -a $OUT/${ci}-scripts.log + (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log set +e } From 650db0f25f28cdd1ceb53b57b052794283d12419 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Sun, 14 Apr 2024 12:00:05 +0200 Subject: [PATCH 7/7] add --split-max-size to readme --- examples/gguf-split/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/gguf-split/README.md b/examples/gguf-split/README.md index ddb1f76497aed..ad1d86651b46d 100644 --- a/examples/gguf-split/README.md +++ b/examples/gguf-split/README.md @@ -5,5 +5,6 @@ CLI to split / merge GGUF files. **Command line options:** - `--split`: split GGUF to multiple GGUF, default operation. +- `--split-max-size`: max size per split in `M` or `G`, f.ex. `500M` or `2G`. - `--split-max-tensors`: maximum tensors in each split: default(128) - `--merge`: merge multiple GGUF to a single GGUF.