Skip to content

Commit

Permalink
Add C++ runtime for MeloTTS (#1138)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Jul 16, 2024
1 parent 9548541 commit 960eb75
Show file tree
Hide file tree
Showing 51 changed files with 693 additions and 156 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/export-melo-tts-to-onnx.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,16 @@ jobs:
echo "pwd: $PWD"
ls -lh ../scripts/melo-tts
rm -rf ./
cp -v ../scripts/melo-tts/*.onnx .
cp -v ../scripts/melo-tts/lexicon.txt .
cp -v ../scripts/melo-tts/tokens.txt .
cp -v ../scripts/melo-tts/README.md .
curl -SL -O https://raw.githubusercontent.com/myshell-ai/MeloTTS/main/LICENSE
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/new_heteronym.fst
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst
Expand All @@ -77,6 +83,10 @@ jobs:
git lfs track "*.onnx"
git add .
ls -lh
git status
git commit -m "add models"
git push https://csukuangfj:[email protected]/csukuangfj/vits-melo-tts-zh_en main || true
Expand Down
10 changes: 7 additions & 3 deletions .github/workflows/windows-x64-jni.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,14 @@ jobs:
cd build
cmake \
-A x64 \
-D CMAKE_BUILD_TYPE=Release \
-D BUILD_SHARED_LIBS=ON \
-DBUILD_SHARED_LIBS=ON \
-D SHERPA_ONNX_ENABLE_JNI=ON \
-D CMAKE_INSTALL_PREFIX=./install \
-DCMAKE_INSTALL_PREFIX=./install \
-DCMAKE_BUILD_TYPE=Release \
-DSHERPA_ONNX_ENABLE_WEBSOCKET=OFF \
-DBUILD_ESPEAK_NG_EXE=OFF \
-DSHERPA_ONNX_BUILD_C_API_EXAMPLES=OFF \
-DSHERPA_ONNX_ENABLE_BINARY=ON \
..
- name: Build sherpa-onnx for windows
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 1.10.16

* Support zh-en TTS model from MeloTTS.

## 1.10.15

* Downgrade onnxruntime from v1.18.1 to v1.17.1
Expand Down
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ project(sherpa-onnx)
# ./nodejs-addon-examples
# ./dart-api-examples/
# ./CHANGELOG.md
set(SHERPA_ONNX_VERSION "1.10.15")
set(SHERPA_ONNX_VERSION "1.10.16")

# Disable warning about
#
Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/non-streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.15
sherpa_onnx: ^1.10.16
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/streaming-asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.15
sherpa_onnx: ^1.10.16
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/tts/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ environment:

# Add regular dependencies here.
dependencies:
sherpa_onnx: ^1.10.15
sherpa_onnx: ^1.10.16
path: ^1.9.0
args: ^2.5.0

Expand Down
2 changes: 1 addition & 1 deletion dart-api-examples/vad/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ environment:
sdk: ^3.4.0

dependencies:
sherpa_onnx: ^1.10.15
sherpa_onnx: ^1.10.16
path: ^1.9.0
args: ^2.5.0

Expand Down
4 changes: 2 additions & 2 deletions flutter-examples/streaming_asr/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description: >
publish_to: 'none'

version: 1.10.14
version: 1.10.16

topics:
- speech-recognition
Expand All @@ -30,7 +30,7 @@ dependencies:
record: ^5.1.0
url_launcher: ^6.2.6

sherpa_onnx: ^1.10.15
sherpa_onnx: ^1.10.16
# sherpa_onnx:
# path: ../../flutter/sherpa_onnx

Expand Down
4 changes: 2 additions & 2 deletions flutter-examples/tts/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description: >
publish_to: 'none' # Remove this line if you wish to publish to pub.dev

version: 1.0.0
version: 1.10.16

environment:
sdk: '>=3.4.0 <4.0.0'
Expand All @@ -17,7 +17,7 @@ dependencies:
cupertino_icons: ^1.0.6
path_provider: ^2.1.3
path: ^1.9.0
sherpa_onnx: ^1.10.15
sherpa_onnx: ^1.10.16
url_launcher: ^6.2.6
audioplayers: ^5.0.0

Expand Down
12 changes: 6 additions & 6 deletions flutter/sherpa_onnx/pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ topics:
- voice-activity-detection

# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
version: 1.10.15
version: 1.10.16

homepage: https://github.com/k2-fsa/sherpa-onnx

Expand All @@ -30,19 +30,19 @@ dependencies:
flutter:
sdk: flutter

sherpa_onnx_android: ^1.10.15
sherpa_onnx_android: ^1.10.16
# path: ../sherpa_onnx_android

sherpa_onnx_macos: ^1.10.15
sherpa_onnx_macos: ^1.10.16
# path: ../sherpa_onnx_macos

sherpa_onnx_linux: ^1.10.15
sherpa_onnx_linux: ^1.10.16
# path: ../sherpa_onnx_linux
#
sherpa_onnx_windows: ^1.10.15
sherpa_onnx_windows: ^1.10.16
# path: ../sherpa_onnx_windows

sherpa_onnx_ios: ^1.10.15
sherpa_onnx_ios: ^1.10.16
# sherpa_onnx_ios:
# path: ../sherpa_onnx_ios

Expand Down
2 changes: 1 addition & 1 deletion flutter/sherpa_onnx_ios/ios/sherpa_onnx_ios.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
# https://groups.google.com/g/dart-ffi/c/nUATMBy7r0c
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_ios'
s.version = '1.10.15'
s.version = '1.10.16'
s.summary = 'A new Flutter FFI plugin project.'
s.description = <<-DESC
A new Flutter FFI plugin project.
Expand Down
2 changes: 1 addition & 1 deletion flutter/sherpa_onnx_macos/macos/sherpa_onnx_macos.podspec
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
Pod::Spec.new do |s|
s.name = 'sherpa_onnx_macos'
s.version = '1.10.15'
s.version = '1.10.16'
s.summary = 'sherpa-onnx Flutter FFI plugin project.'
s.description = <<-DESC
sherpa-onnx Flutter FFI plugin project.
Expand Down
2 changes: 1 addition & 1 deletion nodejs-addon-examples/package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"dependencies": {
"sherpa-onnx-node": "^1.10.15"
"sherpa-onnx-node": "^1.10.16"
}
}
4 changes: 4 additions & 0 deletions scripts/apk/build-apk-tts-engine.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ sed -i.bak s/"lang = null"/"lang = \"$lang_iso_639_3\""/ ./TtsEngine.kt
git diff
popd

if [[ $model_dir == vits-melo-tts-zh_en ]]; then
lang=zh_en
fi

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
log "------------------------------------------------------------"
log "build tts apk for $arch"
Expand Down
4 changes: 4 additions & 0 deletions scripts/apk/build-apk-tts.sh.in
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,10 @@ sed -i.bak s/"modelName = null"/"modelName = \"$model_name\""/ ./MainActivity.kt
git diff
popd

if [[ $model_dir == vits-melo-tts-zh_en ]]; then
lang=zh_en
fi

for arch in arm64-v8a armeabi-v7a x86_64 x86; do
log "------------------------------------------------------------"
log "build tts apk for $arch"
Expand Down
18 changes: 13 additions & 5 deletions scripts/apk/generate-tts-apk-script.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,11 @@ def get_vits_models() -> List[TtsModel]:
model_name="vits-zh-hf-fanchen-wnj.onnx",
lang="zh",
),
TtsModel(
model_dir="vits-melo-tts-zh_en",
model_name="model.onnx",
lang="zh",
),
TtsModel(
model_dir="vits-zh-hf-fanchen-C",
model_name="vits-zh-hf-fanchen-C.onnx",
Expand Down Expand Up @@ -339,18 +344,21 @@ def get_vits_models() -> List[TtsModel]:
),
]

rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
rule_fsts = ["phone.fst", "date.fst", "number.fst"]
for m in chinese_models:
s = [f"{m.model_dir}/{r}" for r in rule_fsts]
if "vits-zh-hf" in m.model_dir or "sherpa-onnx-vits-zh-ll" == m.model_dir:
if (
"vits-zh-hf" in m.model_dir
or "sherpa-onnx-vits-zh-ll" == m.model_dir
or "melo-tts" in m.model_dir
):
s = s[:-1]
m.dict_dir = m.model_dir + "/dict"
else:
m.rule_fars = f"{m.model_dir}/rule.far"

m.rule_fsts = ",".join(s)

if "vits-zh-hf" not in m.model_dir and "zh-ll" not in m.model_dir:
m.rule_fars = f"{m.model_dir}/rule.far"

all_models = chinese_models + [
TtsModel(
model_dir="vits-cantonese-hf-xiaomaiiwn",
Expand Down
2 changes: 1 addition & 1 deletion scripts/dart/sherpa-onnx-pubspec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ topics:
- voice-activity-detection

# remember to change the version in ../sherpa_onnx_macos/macos/sherpa_onnx.podspec
version: 1.10.15
version: 1.10.16

homepage: https://github.com/k2-fsa/sherpa-onnx

Expand Down
35 changes: 18 additions & 17 deletions scripts/flutter/generate-tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@

import jinja2

# pip install iso639-lang
from iso639 import Lang


def get_args():
parser = argparse.ArgumentParser()
Expand Down Expand Up @@ -37,13 +34,6 @@ class TtsModel:
data_dir: Optional[str] = None
dict_dir: Optional[str] = None
is_char: bool = False
lang_iso_639_3: str = ""


def convert_lang_to_iso_639_3(models: List[TtsModel]):
for m in models:
if m.lang_iso_639_3 == "":
m.lang_iso_639_3 = Lang(m.lang).pt3


def get_coqui_models() -> List[TtsModel]:
Expand Down Expand Up @@ -312,6 +302,11 @@ def get_vits_models() -> List[TtsModel]:
model_name="vits-zh-hf-fanchen-wnj.onnx",
lang="zh",
),
TtsModel(
model_dir="vits-melo-tts-zh_en",
model_name="model.onnx",
lang="zh_en",
),
TtsModel(
model_dir="vits-zh-hf-fanchen-C",
model_name="vits-zh-hf-fanchen-C.onnx",
Expand All @@ -332,26 +327,33 @@ def get_vits_models() -> List[TtsModel]:
model_name="vits-zh-hf-fanchen-unity.onnx",
lang="zh",
),
TtsModel(
model_dir="sherpa-onnx-vits-zh-ll",
model_name="model.onnx",
lang="zh",
),
]

rule_fsts = ["phone.fst", "date.fst", "number.fst", "new_heteronym.fst"]
rule_fsts = ["phone.fst", "date.fst", "number.fst"]
for m in chinese_models:
s = [f"{m.model_dir}/{r}" for r in rule_fsts]
if "vits-zh-hf" in m.model_dir:
if (
"vits-zh-hf" in m.model_dir
or "sherpa-onnx-vits-zh-ll" == m.model_dir
or "melo-tts" in m.model_dir
):
s = s[:-1]
m.dict_dir = m.model_dir + "/dict"
else:
m.rule_fars = f"{m.model_dir}/rule.far"

m.rule_fsts = ",".join(s)

if "vits-zh-hf" not in m.model_dir:
m.rule_fars = f"{m.model_dir}/rule.far"

all_models = chinese_models + [
TtsModel(
model_dir="vits-cantonese-hf-xiaomaiiwn",
model_name="vits-cantonese-hf-xiaomaiiwn.onnx",
lang="cantonese",
lang_iso_639_3="yue",
rule_fsts="vits-cantonese-hf-xiaomaiiwn/rule.fst",
),
# English (US)
Expand All @@ -374,7 +376,6 @@ def main():
all_model_list += get_piper_models()
all_model_list += get_mimic3_models()
all_model_list += get_coqui_models()
convert_lang_to_iso_639_3(all_model_list)

num_models = len(all_model_list)

Expand Down
6 changes: 6 additions & 0 deletions scripts/melo-tts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Introduction

Models in this directory are converted from
https://github.com/myshell-ai/MeloTTS

Note there is only a single female speaker in the model.
4 changes: 3 additions & 1 deletion scripts/melo-tts/export-onnx.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from melo.text.chinese import pinyin_to_symbol_map
from melo.text.english import eng_dict, refine_syllables
from pypinyin import Style, lazy_pinyin, phrases_dict, pinyin_dict
from melo.text.symbols import language_tone_start_map

for k, v in pinyin_to_symbol_map.items():
if isinstance(v, list):
Expand Down Expand Up @@ -82,6 +81,7 @@ def generate_tokens(symbol_list):
def generate_lexicon():
word_dict = pinyin_dict.pinyin_dict
phrases = phrases_dict.phrases_dict
eng_dict["kaldi"] = [["K", "AH0"], ["L", "D", "IH0"]]
with open("lexicon.txt", "w", encoding="utf-8") as f:
for word in eng_dict:
phones, tones = refine_syllables(eng_dict[word])
Expand Down Expand Up @@ -237,9 +237,11 @@ def main():
meta_data = {
"model_type": "melo-vits",
"comment": "melo",
"version": 2,
"language": "Chinese + English",
"add_blank": int(model.hps.data.add_blank),
"n_speakers": 1,
"jieba": 1,
"sample_rate": model.hps.data.sampling_rate,
"bert_dim": 1024,
"ja_bert_dim": 768,
Expand Down
2 changes: 1 addition & 1 deletion scripts/melo-tts/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ function install() {
cd MeloTTS
pip install -r ./requirements.txt

pip install soundfile onnx onnxruntime
pip install soundfile onnx==1.15.0 onnxruntime==1.16.3

python3 -m unidic download
popd
Expand Down
Loading

0 comments on commit 960eb75

Please sign in to comment.