Skip to content

Commit

Permalink
Merge pull request #63 from sensein/alistair/add_streamlit_dependency
Browse files Browse the repository at this point in the history
Remove TTS and convert voice feature
  • Loading branch information
alistairewj authored Jun 25, 2024
2 parents 25afa6a + 5d1c158 commit e3f0ceb
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 97 deletions.
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip setuptools
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install --no-cache-dir --editable=".[dev,tts]"
pip install torch torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install --no-cache-dir --editable=".[dev]"
- name: Test with pytest
run: |
pytest tests
6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,12 @@ dependencies = [
"matplotlib>=3.8.3",
"click",
"pydra~=0.23",
"numpy",
"sentencepiece",
"transformers",
"accelerate",
"fhir.resources==7.1.0",
"streamlit",
"datasets[audio]"
]

Expand All @@ -45,9 +48,6 @@ doc = [
"jupytext",
"ipympl"
]
tts = [
"TTS; python_version < '3.12'"
]
dev = [
"b2aiprep[doc]",
"pytest",
Expand Down
37 changes: 0 additions & 37 deletions src/b2aiprep/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,6 @@
verify_speaker_from_files,
)

try:
import TTS
except ImportError:
TTS = None
else:
from .process import VoiceConversion


@click.group()
def main():
pass
Expand Down Expand Up @@ -274,35 +266,6 @@ def verify(file1, file2, model, device):
score, prediction = verify_speaker_from_files(file1, file2, model=model, device=device)
print(f"Score: {float(score):.2f} Prediction: {bool(prediction)}")


if TTS is not None:

@main.command()
@click.argument("source_file", type=click.Path(exists=True))
@click.argument("target_voice_file", type=click.Path(exists=True))
@click.argument("output_file", type=click.Path())
@click.option(
"--model_name",
type=str,
default="voice_conversion_models/multilingual/vctk/freevc24",
show_default=True,
)
@click.option(
"--device", type=str, default=None, show_default=True, help="Device to use for inference."
)
@click.option("--progress_bar", type=bool, default=True, show_default=True)
def convert_voice(
source_file, target_voice_file, output_file, model_name, device, progress_bar
):
"""
Converts the voice in the source_file to match the voice in the target_voice_file,
and saves the output to output_file.
"""
vc = VoiceConversion(model_name=model_name, progress_bar=progress_bar, device=device)
vc.convert_voice(source_file, target_voice_file, output_file)
print(f"Conversion complete. Output saved to: {output_file}")


@main.command()
@click.argument("audio_file", type=click.Path(exists=True))
@click.option("--model_id", type=str, default="openai/whisper-tiny", show_default=True)
Expand Down
55 changes: 0 additions & 55 deletions src/b2aiprep/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@
from speechbrain.inference.speaker import EncoderClassifier
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

try:
from TTS.api import TTS
except ImportError:
TTS = None

warnings.filterwarnings("ignore")


Expand Down Expand Up @@ -386,56 +381,6 @@ def to_features(

return features if return_features else None, outfile, outfig


if TTS is not None:

class VoiceConversion:
def __init__(
self,
model_name: str = "voice_conversion_models/multilingual/vctk/freevc24",
progress_bar: bool = True,
device: ty.Optional[str] = None,
) -> None:
"""
Initialize the Voice Conversion model.
:param model_name: Name of the model to be used for voice conversion.
:param use_gpu: Boolean indicating whether to use GPU for model computation.
TODO: Add support for multiple devices.
"""
use_gpu = False
if device is not None and "cuda" in device:
# If CUDA is available, set use_gpu to True
if torch.cuda.is_available():
use_gpu = True
# If CUDA is not available, raise an error
else:
raise ValueError("CUDA is not available. Please use CPU.")

self.tts = TTS(model_name=model_name, progress_bar=progress_bar, gpu=use_gpu)

def convert_voice(self, source_file: str, target_file: str, output_file: str) -> None:
"""
Converts the voice from the source audio file to match the voice in
the target audio file.
:param source_file: Path to the source audio file.
:param target_file: Path to the target audio file.
:param output_file: Path where the converted audio file will be saved.
"""
if not os.path.exists(source_file):
raise FileNotFoundError(f"The source file {source_file} does not exist.")
if not os.path.exists(target_file):
raise FileNotFoundError(f"The target file {target_file} does not exist.")

# Perform voice conversion without modifying the source or target audio data directly.
with torch.no_grad():
self.tts.voice_conversion_to_file(
source_wav=source_file, target_wav=target_file, file_path=output_file
)


class SpeechToText:
"""
A class for converting speech to text using a specified speech-to-text model.
Expand Down

0 comments on commit e3f0ceb

Please sign in to comment.