Skip to content

Commit

Permalink
Add Flush to VAD so that the last segment can be detected. (#1099)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jul 9, 2024
1 parent 3e4307e commit c2cc9de
Show file tree
Hide file tree
Showing 35 changed files with 237 additions and 29 deletions.
14 changes: 8 additions & 6 deletions .github/workflows/dot-net.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,6 @@ jobs:
cmake --build . --target install --config Release
rm -rf install/pkgconfig
- uses: actions/upload-artifact@v4
with:
name: windows-${{ matrix.arch }}
path: ./build/install/lib/

- name: Create tar file
shell: bash
run: |
Expand All @@ -72,6 +67,11 @@ jobs:
ls -lh *.tar.bz2
mv *.tar.bz2 ../
- uses: actions/upload-artifact@v4
with:
name: windows-${{ matrix.arch }}
path: ./*.tar.bz2

# https://huggingface.co/docs/hub/spaces-github-actions
- name: Publish to huggingface
if: (github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa') && (github.event_name == 'push' || github.event_name == 'workflow_dispatch')
Expand All @@ -88,7 +88,9 @@ jobs:
rm -rf huggingface
export GIT_CLONE_PROTECTION_ACTIVE=false
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
export GIT_LFS_SKIP_SMUDGE=1
git clone https://huggingface.co/csukuangfj/sherpa-onnx-libs huggingface
cd huggingface
mkdir -p windows-for-dotnet
Expand Down
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
## 1.10.12

* Add Flush to VAD so that the last speech segment can be detected. See also
https://github.com/k2-fsa/sherpa-onnx/discussions/1077#discussioncomment-9979740

## 1.10.11

* Support the iOS platform for iOS.
Expand Down
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ project(sherpa-onnx)
# Remember to update
# ./nodejs-addon-examples
# ./dart-api-examples/
# ./sherpa-onnx/flutter/CHANGELOG.md
set(SHERPA_ONNX_VERSION "1.10.11")
# ./CHANGELOG.md
set(SHERPA_ONNX_VERSION "1.10.12")

# Disable warning about
#
Expand Down
22 changes: 22 additions & 0 deletions dart-api-examples/non-streaming-asr/bin/vad-with-paraformer.dart
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,28 @@ void main(List<String> arguments) async {
}
}

vad.flush();
while (!vad.isEmpty()) {
final stream = recognizer.createStream();
final segment = vad.front();
stream.acceptWaveform(
samples: segment.samples, sampleRate: waveData.sampleRate);
recognizer.decode(stream);

final result = recognizer.getResult(stream);

final startTime = segment.start * 1.0 / waveData.sampleRate;
final duration = segment.samples.length * 1.0 / waveData.sampleRate;
final stopTime = startTime + duration;
if (result.text != '') {
print(
'${startTime.toStringAsPrecision(4)} -- ${stopTime.toStringAsPrecision(4)}: ${result.text}');
}

stream.free();
vad.pop();
}

vad.free();
recognizer.free();
}
2 changes: 1 addition & 1 deletion dart-api-examples/non-streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.11
sherpa_onnx: ^1.10.12
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.11
sherpa_onnx: ^1.10.12
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/tts/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.11
sherpa_onnx: ^1.10.12
path: ^1.9.0
args: ^2.5.0

Expand Down
6 changes: 6 additions & 0 deletions dart-api-examples/vad/bin/vad.dart
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ void main(List<String> arguments) async {
}
}

vad.flush();
while (!vad.isEmpty()) {
allSamples.add(vad.front().samples);
vad.pop();
}

vad.free();

final s = Float32List.fromList(allSamples.expand((x) => x).toList());
Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/vad/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.11
sherpa_onnx: ^1.10.12
path: ^1.9.0
args: ^2.5.0

Expand Down
20 changes: 20 additions & 0 deletions dotnet-examples/vad-non-streaming-asr-paraformer/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,26 @@ static void Main(string[] args)
}
}
}

vad.Flush();

while (!vad.IsEmpty()) {
SpeechSegment segment = vad.Front();
float startTime = segment.Start / (float)sampleRate;
float duration = segment.Samples.Length / (float)sampleRate;

OfflineStream stream = recognizer.CreateStream();
stream.AcceptWaveform(sampleRate, segment.Samples);
recognizer.Decode(stream);
String text = stream.Result.Text;

if (!String.IsNullOrEmpty(text)) {
Console.WriteLine("{0}--{1}: {2}", String.Format("{0:0.00}", startTime),
String.Format("{0:0.00}", startTime+duration), text);
}

vad.Pop();
}
}
}

4 changes: 2 additions & 2 deletions flutter-examples/streaming_asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description: >
publish_to: 'none'

version: 1.10.11
version: 1.10.12

topics:
- speech-recognition
Expand All @@ -30,7 +30,7 @@ dependencies:
record: ^5.1.0
url_launcher: ^6.2.6

sherpa_onnx: ^1.10.11
sherpa_onnx: ^1.10.12
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx

Expand Down
2 changes: 1 addition & 1 deletion flutter-examples/tts/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
cupertino_icons: ^1.0.6
path_provider: ^2.1.3
path: ^1.9.0
sherpa_onnx: ^1.10.11
sherpa_onnx: ^1.10.12
url_launcher: ^6.2.6
audioplayers: ^5.0.0

Expand Down
13 changes: 13 additions & 0 deletions flutter/sherpa_onnx/lib/src/sherpa_onnx_bindings.dart
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,12 @@ typedef SherpaOnnxVoiceActivityDetectorResetNative = Void Function(
typedef SherpaOnnxVoiceActivityDetectorReset = void Function(
Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorFlushNative = Void Function(
Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorFlush = void Function(
Pointer<SherpaOnnxVoiceActivityDetector>);

typedef SherpaOnnxVoiceActivityDetectorFrontNative
= Pointer<SherpaOnnxSpeechSegment> Function(
Pointer<SherpaOnnxVoiceActivityDetector>);
Expand Down Expand Up @@ -779,6 +785,8 @@ class SherpaOnnxBindings {

static SherpaOnnxVoiceActivityDetectorReset? voiceActivityDetectorReset;

static SherpaOnnxVoiceActivityDetectorFlush? voiceActivityDetectorFlush;

static SherpaOnnxCreateCircularBuffer? createCircularBuffer;

static SherpaOnnxDestroyCircularBuffer? destroyCircularBuffer;
Expand Down Expand Up @@ -1036,6 +1044,11 @@ class SherpaOnnxBindings {
'SherpaOnnxVoiceActivityDetectorReset')
.asFunction();

voiceActivityDetectorFlush ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxVoiceActivityDetectorFlushNative>>(
'SherpaOnnxVoiceActivityDetectorFlush')
.asFunction();

createCircularBuffer ??= dynamicLibrary
.lookup<NativeFunction<SherpaOnnxCreateCircularBufferNative>>(
'SherpaOnnxCreateCircularBuffer')
Expand Down
4 changes: 4 additions & 0 deletions flutter/sherpa_onnx/lib/src/vad.dart
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,10 @@ class VoiceActivityDetector {
SherpaOnnxBindings.voiceActivityDetectorReset?.call(ptr);
}

void flush() {
SherpaOnnxBindings.voiceActivityDetectorFlush?.call(ptr);
}

Pointer<SherpaOnnxVoiceActivityDetector> ptr;
final VadModelConfig config;
}
12 changes: 6 additions & 6 deletions flutter/sherpa_onnx/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ topics:
- voice-activity-detection

# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
version: 1.10.11
version: 1.10.12

homepage: https://github.com/k2-fsa/sherpa-onnx

Expand All @@ -30,19 +30,19 @@ dependencies:
flutter:
sdk: flutter

sherpa_onnx_android: ^1.10.11
sherpa_onnx_android: ^1.10.12
# path: ../sherpa_onnx_android

sherpa_onnx_macos: ^1.10.11
sherpa_onnx_macos: ^1.10.12
# path: ../sherpa_onnx_macos

sherpa_onnx_linux: ^1.10.11
sherpa_onnx_linux: ^1.10.12
# path: ../sherpa_onnx_linux
#
sherpa_onnx_windows: ^1.10.11
sherpa_onnx_windows: ^1.10.12
# path: ../sherpa_onnx_windows

sherpa_onnx_ios: ^1.10.11
sherpa_onnx_ios: ^1.10.12
# sherpa_onnx_ios:
# path: ../sherpa_onnx_ios

Expand Down
2 changes: 1 addition & 1 deletion flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_ios'
s.version = '1.10.11'
s.version = '1.10.12'
s.summary = 'A new Flutter FFI plugin project.'
s.description = <<-DESC
A new Flutter FFI plugin project.
Expand Down
2 changes: 1 addition & 1 deletion flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_macos'
s.version = '1.10.11'
s.version = '1.10.12'
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
s.description = <<-DESC
sherpa-onnx Flutter FFI plugin project.
Expand Down
19 changes: 19 additions & 0 deletions java-api-examples/VadNonStreamingParaformer.java
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,25 @@ public static void main(String[] args) {
}
}

vad.flush();
while (!vad.empty()) {
SpeechSegment segment = vad.front();
float startTime = segment.getStart() / 16000.0f;
float duration = segment.getSamples().length / 16000.0f;

OfflineStream stream = recognizer.createStream();
stream.acceptWaveform(segment.getSamples(), 16000);
recognizer.decode(stream);
String text = recognizer.getResult(stream).getText();
stream.release();

if (!text.isEmpty()) {
System.out.printf("%.3f--%.3f: %s\n", startTime, startTime + duration, text);
}

vad.pop();
}

vad.release();
recognizer.release();
}
Expand Down
10 changes: 10 additions & 0 deletions java-api-examples/VadRemoveSilence.java
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,16 @@ public static void main(String[] args) {
}
}

vad.flush();
while (!vad.empty()) {

// if you want to get the starting time of this segment, you can use
/* float startTime = vad.front().getStart() / 16000.0f; */

segments.add(vad.front().getSamples());
vad.pop();
}

// get total number of samples
int n = 0;
for (float[] s : segments) {
Expand Down
2 changes: 1 addition & 1 deletion nodejs-addon-examples/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"dependencies": {
"sherpa-onnx-node": "^1.10.6"
"sherpa-onnx-node": "^1.10.12"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,12 @@ def main():
speech_samples.extend(vad.front.samples)
vad.pop()

vad.flush()

while not vad.empty():
speech_samples.extend(vad.front.samples)
vad.pop()

speech_samples = np.array(speech_samples, dtype=np.float32)

sf.write(args.output, speech_samples, samplerate=sample_rate)
Expand Down
2 changes: 1 addition & 1 deletion scripts/dart/sherpa-onnx-pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ topics:
- voice-activity-detection

# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx.podspec
version: 1.10.6
version: 1.10.12

homepage: https://github.com/k2-fsa/sherpa-onnx

Expand Down
7 changes: 7 additions & 0 deletions scripts/dotnet/VoiceActivityDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ public void Reset()
SherpaOnnxVoiceActivityDetectorReset(_handle.Handle);
}

public void Flush()
{
SherpaOnnxVoiceActivityDetectorFlush(_handle.Handle);
}

public void Dispose()
{
Cleanup();
Expand Down Expand Up @@ -106,5 +111,7 @@ private void Cleanup()
[DllImport(Dll.Filename)]
private static extern void SherpaOnnxVoiceActivityDetectorReset(IntPtr handle);

[DllImport(Dll.Filename)]
private static extern void SherpaOnnxVoiceActivityDetectorFlush(IntPtr handle);
}
}
4 changes: 4 additions & 0 deletions scripts/go/sherpa_onnx.go
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,10 @@ func (vad *VoiceActivityDetector) Reset() {
C.SherpaOnnxVoiceActivityDetectorReset(vad.impl)
}

func (vad *VoiceActivityDetector) Flush() {
C.SherpaOnnxVoiceActivityDetectorFlush(vad.impl)
}

// Spoken language identification

type SpokenLanguageIdentificationWhisperConfig struct {
Expand Down
Loading

0 comments on commit c2cc9de

Please sign in to comment.