From e8ded67b988f6a1f053d3e9d7e7b56d041a33c09 Mon Sep 17 00:00:00 2001 From: Yifan Yang <64255737+yfyeung@users.noreply.github.com> Date: Sat, 20 May 2023 18:39:48 +0800 Subject: [PATCH 1/4] Update RESULTS.md --- egs/librispeech/ASR/RESULTS.md | 78 ++++++++++++++++++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/egs/librispeech/ASR/RESULTS.md b/egs/librispeech/ASR/RESULTS.md index ed456a6171..94950a69af 100644 --- a/egs/librispeech/ASR/RESULTS.md +++ b/egs/librispeech/ASR/RESULTS.md @@ -244,6 +244,84 @@ for m in greedy_search modified_beam_search fast_beam_search; do done ``` +### pruned_transducer_stateless7 (Zipformer Adaption) + +See for more details. + +[pruned_transducer_stateless7](./pruned_transducer_stateless7) + +The tensorboard log can be found at + + +You can find a pretrained model, training logs, decoding logs, and decoding +results at: + + +You can use to deploy it. + +Number of model parameters: 70369391, i.e., 70.37 M + +| decoding method | dev | test | test-clean | test-other | comment | +|----------------------|------------|------------|------------|------------|--------------------| +| greedy_search | 14.27 | 14.22 | 2.08 | 4.79 | --epoch 20 --avg 5 | +| modified_beam_search | 14.22 | 14.08 | 2.06 | 4.72 | --epoch 20 --avg 5 | +| fast_beam_search | 14.23 | 14.17 | 2.08 | 4.09 | --epoch 20 --avg 5 | + +The training commands are: +```bash +export CUDA_VISIBLE_DEVICES="0,1" + +./pruned_transducer_stateless7/finetune.py \ + --world-size 2 \ + --num-epochs 20 \ + --start-epoch 1 \ + --exp-dir pruned_transducer_stateless7/exp_giga_finetune \ + --subset S \ + --use-fp16 1 \ + --base-lr 0.005 \ + --lr-epochs 100 \ + --lr-batches 100000 \ + --bpe-model icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11/data/lang_bpe_500/bpe.model \ + --do-finetune True \ + --use-mux True \ + --finetune-ckpt icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11//exp/pretrain.pt \ + --max-duration 500 +``` + +The decoding commands are: +```bash +# greedy_search +./pruned_transducer_stateless7/decode.py \ + --epoch 20 \ + --avg 5 \ + --use-averaged-model 1 \ + --exp-dir ./pruned_transducer_stateless7/exp \ + --max-duration 600 \ + --decoding-method greedy_search + +# modified_beam_search +./pruned_transducer_stateless7/decode.py \ + --epoch 20 \ + --avg 5 \ + --use-averaged-model 1 \ + --exp-dir ./pruned_transducer_stateless7/exp \ + --max-duration 600 \ + --decoding-method modified_beam_search \ + --beam-size 4 + +# fast_beam_search +./pruned_transducer_stateless7/decode.py \ + --epoch 20 \ + --avg 5 \ + --use-averaged-model 1 \ + --exp-dir ./pruned_transducer_stateless7/exp \ + --max-duration 600 \ + --decoding-method fast_beam_search \ + --beam 20.0 \ + --max-contexts 8 \ + --max-states 64 +``` + ### pruned_transducer_stateless7 (zipformer + multidataset(LibriSpeech + GigaSpeech + CommonVoice 13.0)) See for more details. From 8aaa458c5a3b31555b013cf23b3ee4e240eda92f Mon Sep 17 00:00:00 2001 From: Yifan Yang <64255737+yfyeung@users.noreply.github.com> Date: Sat, 20 May 2023 22:24:33 +0800 Subject: [PATCH 2/4] Update RESULTS.md --- egs/librispeech/ASR/RESULTS.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/egs/librispeech/ASR/RESULTS.md b/egs/librispeech/ASR/RESULTS.md index 94950a69af..308ae9af2b 100644 --- a/egs/librispeech/ASR/RESULTS.md +++ b/egs/librispeech/ASR/RESULTS.md @@ -244,7 +244,7 @@ for m in greedy_search modified_beam_search fast_beam_search; do done ``` -### pruned_transducer_stateless7 (Zipformer Adaption) +### pruned_transducer_stateless7 (Fine-tune with mux) See for more details. @@ -255,7 +255,7 @@ The tensorboard log can be found at You can find a pretrained model, training logs, decoding logs, and decoding results at: - + You can use to deploy it. From 1d71a286452b91156f25aa2d5269082b425c11a0 Mon Sep 17 00:00:00 2001 From: Yifan Yang <64255737+yfyeung@users.noreply.github.com> Date: Mon, 22 May 2023 10:32:31 +0800 Subject: [PATCH 3/4] Update RESULTS.md --- egs/librispeech/ASR/RESULTS.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/egs/librispeech/ASR/RESULTS.md b/egs/librispeech/ASR/RESULTS.md index 308ae9af2b..db83950ccb 100644 --- a/egs/librispeech/ASR/RESULTS.md +++ b/egs/librispeech/ASR/RESULTS.md @@ -295,7 +295,7 @@ The decoding commands are: --epoch 20 \ --avg 5 \ --use-averaged-model 1 \ - --exp-dir ./pruned_transducer_stateless7/exp \ + --exp-dir ./pruned_transducer_stateless7/exp_giga_finetune \ --max-duration 600 \ --decoding-method greedy_search @@ -304,7 +304,7 @@ The decoding commands are: --epoch 20 \ --avg 5 \ --use-averaged-model 1 \ - --exp-dir ./pruned_transducer_stateless7/exp \ + --exp-dir ./pruned_transducer_stateless7/exp_giga_finetune \ --max-duration 600 \ --decoding-method modified_beam_search \ --beam-size 4 @@ -314,7 +314,7 @@ The decoding commands are: --epoch 20 \ --avg 5 \ --use-averaged-model 1 \ - --exp-dir ./pruned_transducer_stateless7/exp \ + --exp-dir ./pruned_transducer_stateless7/exp_giga_finetune \ --max-duration 600 \ --decoding-method fast_beam_search \ --beam 20.0 \ From 93d93b424bc5cad18b13d65e304485e0de7594dd Mon Sep 17 00:00:00 2001 From: Yifan Yang <64255737+yfyeung@users.noreply.github.com> Date: Mon, 22 May 2023 11:56:45 +0800 Subject: [PATCH 4/4] Update RESULTS.md --- egs/librispeech/ASR/RESULTS.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/egs/librispeech/ASR/RESULTS.md b/egs/librispeech/ASR/RESULTS.md index db83950ccb..28361afdd8 100644 --- a/egs/librispeech/ASR/RESULTS.md +++ b/egs/librispeech/ASR/RESULTS.md @@ -253,7 +253,10 @@ See for more details. The tensorboard log can be found at -You can find a pretrained model, training logs, decoding logs, and decoding +You can find the pretrained model and bpe model needed for fine-tuning at: + + +You can find a fine-tuned model, fine-tuning logs, decoding logs, and decoding results at: @@ -284,7 +287,7 @@ export CUDA_VISIBLE_DEVICES="0,1" --bpe-model icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11/data/lang_bpe_500/bpe.model \ --do-finetune True \ --use-mux True \ - --finetune-ckpt icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11//exp/pretrain.pt \ + --finetune-ckpt icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11/exp/pretrain.pt \ --max-duration 500 ```