Pascal API for streaming ASR (#1246)

k2-fsa · Aug 12, 2024 · 5791b69 · 5791b69
1 parent 65f1c0f
commit 5791b69
Show file tree

Hide file tree

Showing 16 changed files with 1,115 additions and 18 deletions.
diff --git a/.github/workflows/pascal.yaml b/.github/workflows/pascal.yaml
@@ -39,7 +39,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, macos-latest, macos-13]
+        os: [ubuntu-latest, macos-latest, macos-13, windows-latest]
 
     steps:
       - uses: actions/checkout@v4
@@ -64,10 +64,19 @@ jobs:
         run: |
           brew install fpc
           # brew install --cask lazarus
+          #
+      - name: Install Free pascal compiler (windows)
+        if: matrix.os == 'windows-latest'
+        shell: bash
+        run: |
+          choco install lazarus
+
+          ls -lh /c/lazarus/fpc/3.2.2/bin/x86_64-win64/
 
       - name: FPC info
         shell: bash
         run: |
+          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
           which fpc
           fpc -i
 
@@ -87,6 +96,7 @@ jobs:
           cd build
 
           cmake \
+            -DCMAKE_INSTALL_PREFIX=./install \
             -D BUILD_SHARED_LIBS=ON \
             -D SHERPA_ONNX_ENABLE_BINARY=OFF \
             -D CMAKE_BUILD_TYPE=Release \
@@ -98,15 +108,55 @@ jobs:
           export PATH="/usr/lib/ccache:/usr/local/opt/ccache/libexec:$PATH"
 
           cd build
-          make -j2 sherpa-onnx-c-api
+          cmake --build . --target install --config Release
+
+          ls -lh install/lib/
+
+          if [[ ${{ matrix.os }} == 'windows-latest' ]]; then
+            cp -v install/lib/*.dll ../pascal-api-examples/read-wav
+            cp -v install/lib/*.dll ../pascal-api-examples/streaming-asr
 
-      - name:  Run Pascal test
+            cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/read-wav
+            cp -v ../sherpa-onnx/pascal-api/sherpa_onnx.pas ../pascal-api-examples/streaming-asr
+          fi
+
+      - name:  Run Pascal test (Read wav test)
         shell: bash
         run: |
+          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
+
           cd ./pascal-api-examples
 
-          echo "----read-wav test-----"
           pushd read-wav
           ./run.sh
+          echo "---"
+          ls -lh
+          popd
+
+      - name:  Run Pascal test (Streaming ASR)
+        shell: bash
+        run: |
+          export PATH=/c/lazarus/fpc/3.2.2/bin/x86_64-win64:$PATH
+
+          cd ./pascal-api-examples
+
+          pushd streaming-asr
+          ./run-zipformer-transducer.sh
+          rm -rf sherpa-onnx-*
+          echo "---"
+
+          if [[ ${{ matrix.os }} != 'windows-latest' ]]; then
+            ./run-paraformer.sh
+            rm -rf sherpa-onnx-*
+            echo "---"
+
+            ./run-zipformer-ctc.sh
+            echo "---"
+
+            ./run-zipformer-ctc-hlg.sh
+            rm -rf sherpa-onnx-*
+            echo "---"
+          fi
+
           ls -lh
           popd
diff --git a/java-api-examples/StreamingDecodeFileCtcHLG.java b/java-api-examples/StreamingDecodeFileCtcHLG.java
@@ -29,7 +29,7 @@ public static void main(String[] args) {
             .build();
 
     OnlineCtcFstDecoderConfig ctcFstDecoderConfig =
-        OnlineCtcFstDecoderConfig.builder().setGraph("hlg").build();
+        OnlineCtcFstDecoderConfig.builder().setGraph(hlg).build();
 
     OnlineRecognizerConfig config =
         OnlineRecognizerConfig.builder()

diff --git a/pascal-api-examples/README.md b/pascal-api-examples/README.md
@@ -0,0 +1,9 @@
+# Introduction
+
+This directory contains examples for how to use the [Object Pascal](https://en.wikipedia.org/wiki/Object_Pascal)
+APIs of [sherpa-onnx](https://github.com/k2-fsa/sherpa-onnx).
+
+|Directory| Description|
+|---------|------------|
+|[read-wav](./read-wav)|It shows how to read a wave file.|
+|[streaming-asr](./streaming-asr)| It shows how to use streaming models for speech recognition.|
diff --git a/pascal-api-examples/read-wav/run.sh b/pascal-api-examples/read-wav/run.sh
@@ -7,19 +7,19 @@ SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
 
 echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
 
-if [[ ! -f ../../build/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/lib/libsherpa-onnx-c-api.so ]]; then
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
   mkdir -p ../../build
   pushd ../../build
   cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
     -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
     -DSHERPA_ONNX_ENABLE_TESTS=OFF \
     -DSHERPA_ONNX_ENABLE_CHECK=OFF \
     -DBUILD_SHARED_LIBS=ON \
     -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
     ..
 
-  make -j4 sherpa-onnx-c-api
-  ls -lh lib
+  cmake --build . --target install --config Release
   popd
 fi
 
@@ -29,10 +29,10 @@ fi
 
 fpc \
   -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
-  -Fl$SHERPA_ONNX_DIR/build/lib \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
   ./main.pas
 
-export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/lib:$LD_LIBRARY_PATH
-export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/lib:$DYLD_LIBRARY_PATH
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
 
 ./main
diff --git a/pascal-api-examples/streaming-asr/.gitignore b/pascal-api-examples/streaming-asr/.gitignore
@@ -0,0 +1,4 @@
+zipformer_transducer
+paraformer
+zipformer_ctc
+zipformer_ctc_hlg
diff --git a/pascal-api-examples/streaming-asr/README.md b/pascal-api-examples/streaming-asr/README.md
@@ -0,0 +1,11 @@
+# Introduction
+
+This folder contains examples about using sherpa-onnx's object pascal
+APIs with streaming models for speech recognition.
+
+|File|Description|
+|----|-----------|
+|[run-paraformer.sh](./run-paraformer.sh)|Use a streaming Paraformer model for speech recognition|
+|[run-zipformer-ctc-hlg.sh](./run-zipformer-ctc-hlg.sh)|Use a streaming Zipformer CTC model for speech recognition|
+|[run-zipformer-ctc.sh](./run-zipformer-ctc.sh)|Use a streaming Zipformer CTC model with HLG for speech recognition|
+|[run-zipformer-transducer.sh](./run-zipformer-transducer.sh)|Use a Zipformer transducer model for speech recognition|
diff --git a/pascal-api-examples/streaming-asr/paraformer.pas b/pascal-api-examples/streaming-asr/paraformer.pas
@@ -0,0 +1,88 @@
+{ Copyright (c)  2024  Xiaomi Corporation }
+
+{
+This file shows how to use a streaming Paraformer model to decode files.
+
+You can download the model files from
+https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+}
+
+program paraformer;
+
+{$mode objfpc}
+
+uses
+  sherpa_onnx,
+  DateUtils,
+  SysUtils;
+
+var
+  Config: TSherpaOnnxOnlineRecognizerConfig;
+  Recognizer: TSherpaOnnxOnlineRecognizer;
+  Stream: TSherpaOnnxOnlineStream;
+  RecognitionResult: TSherpaOnnxOnlineRecognizerResult;
+  Wave: TSherpaOnnxWave;
+  WaveFilename: AnsiString;
+  TailPaddings: array of Single;
+
+  Start: TDateTime;
+  Stop: TDateTime;
+
+  Elapsed: Single;
+  Duration: Single;
+  RealTimeFactor: Single;
+begin
+  Initialize(Config);
+
+  {Please visit https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models
+  to download model files used in this file.}
+  Config.ModelConfig.Paraformer.Encoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/encoder.int8.onnx';
+  Config.ModelConfig.Paraformer.Decoder := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/decoder.int8.onnx';
+  Config.ModelConfig.Tokens := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt';
+
+  Config.ModelConfig.Provider := 'cpu';
+  Config.ModelConfig.NumThreads := 1;
+  Config.ModelConfig.Debug := False;
+
+  WaveFilename := './sherpa-onnx-streaming-paraformer-bilingual-zh-en/test_wavs/2.wav';
+
+  Wave := SherpaOnnxReadWave(WaveFilename);
+
+  Recognizer := TSherpaOnnxOnlineRecognizer.Create(Config);
+
+  Start := Now;
+
+  Stream := Recognizer.CreateStream();
+
+  Stream.AcceptWaveform(Wave.Samples, Wave.SampleRate);
+
+  SetLength(TailPaddings, Round(Wave.SampleRate * 0.5)); {0.5 seconds of padding}
+  Stream.AcceptWaveform(TailPaddings, Wave.SampleRate);
+
+  Stream.InputFinished();
+
+  while Recognizer.IsReady(Stream) do
+    Recognizer.Decode(Stream);
+
+  RecognitionResult := Recognizer.GetResult(Stream);
+
+  Stop := Now;
+
+  Elapsed := MilliSecondsBetween(Stop, Start) / 1000;
+  Duration := Length(Wave.Samples) / Wave.SampleRate;
+  RealTimeFactor := Elapsed / Duration;
+
+  WriteLn(RecognitionResult.ToString);
+  WriteLn(Format('NumThreads %d', [Config.ModelConfig.NumThreads]));
+  WriteLn(Format('Elapsed %.3f s', [Elapsed]));
+  WriteLn(Format('Wave duration %.3f s', [Duration]));
+  WriteLn(Format('RTF = %.3f/%.3f = %.3f', [Elapsed, Duration, RealTimeFactor]));
+
+  {Free resources to avoid memory leak.
+
+  Note: You don't need to invoke them for this simple script.
+  However, you have to invoke them in your own large/complex project.
+  }
+  FreeAndNil(Stream);
+  FreeAndNil(Recognizer);
+end.
diff --git a/pascal-api-examples/streaming-asr/run-paraformer.sh b/pascal-api-examples/streaming-asr/run-paraformer.sh
@@ -0,0 +1,42 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  ls -lh lib
+  popd
+fi
+
+
+if [ ! -f ./sherpa-onnx-streaming-paraformer-bilingual-zh-en/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
+  tar xvf sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
+  rm sherpa-onnx-streaming-paraformer-bilingual-zh-en.tar.bz2
+fi
+
+fpc \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  ./paraformer.pas
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./paraformer
diff --git a/pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh b/pascal-api-examples/streaming-asr/run-zipformer-ctc-hlg.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+fi
+
+fpc \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  ./zipformer_ctc_hlg.pas
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./zipformer_ctc_hlg
diff --git a/pascal-api-examples/streaming-asr/run-zipformer-ctc.sh b/pascal-api-examples/streaming-asr/run-zipformer-ctc.sh
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+set -ex
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+SHERPA_ONNX_DIR=$(cd $SCRIPT_DIR/../.. && pwd)
+
+echo "SHERPA_ONNX_DIR: $SHERPA_ONNX_DIR"
+
+if [[ ! -f ../../build/install/lib/libsherpa-onnx-c-api.dylib  && ! -f ../../build/install/lib/libsherpa-onnx-c-api.so && ! -f ../../build/install/lib/sherpa-onnx-c-api.dll ]]; then
+  mkdir -p ../../build
+  pushd ../../build
+  cmake \
+    -DCMAKE_INSTALL_PREFIX=./install \
+    -DSHERPA_ONNX_ENABLE_PYTHON=OFF \
+    -DSHERPA_ONNX_ENABLE_TESTS=OFF \
+    -DSHERPA_ONNX_ENABLE_CHECK=OFF \
+    -DBUILD_SHARED_LIBS=ON \
+    -DSHERPA_ONNX_ENABLE_PORTAUDIO=OFF \
+    ..
+
+  cmake --build . --target install --config Release
+  ls -lh lib
+  popd
+fi
+
+if [ ! -f ./sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt ]; then
+  curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  tar xvf sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+  rm sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18.tar.bz2
+fi
+
+fpc \
+  -Fu$SHERPA_ONNX_DIR/sherpa-onnx/pascal-api \
+  -Fl$SHERPA_ONNX_DIR/build/install/lib \
+  ./zipformer_ctc.pas
+
+export LD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$LD_LIBRARY_PATH
+export DYLD_LIBRARY_PATH=$SHERPA_ONNX_DIR/build/install/lib:$DYLD_LIBRARY_PATH
+
+./zipformer_ctc