diff --git a/.gitignore b/.gitignore index 9595be5..c8ece8c 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,4 @@ dist # Created by unit tests .pytest_cache/ +/.gtm/ diff --git a/ovos_audio/playback.py b/ovos_audio/playback.py new file mode 100644 index 0000000..927ce05 --- /dev/null +++ b/ovos_audio/playback.py @@ -0,0 +1,272 @@ +import random +import threading +from ovos_audio.transformers import TTSTransformersService +from ovos_bus_client.message import Message +from ovos_plugin_manager.templates.tts import TTS +from ovos_utils.log import LOG +from ovos_utils.sound import play_audio +from queue import Empty +from threading import Thread +from time import time, sleep + + +class PlaybackThread(Thread): + """Thread class for playing back tts audio and sending + viseme data to enclosure. + """ + + def __init__(self, queue=TTS.queue, bus=None): + super(PlaybackThread, self).__init__() + self.queue = queue or TTS.queue + self._terminated = False + self._processing_queue = False + self._paused = False + self.enclosure = None + self.p = None + self._tts = [] + self.bus = bus or None + self._now_playing = None + self.active_tts = None + self._started = threading.Event() + self.tts_transform = TTSTransformersService(self.bus) + + @property + def is_running(self): + return self._started.is_set() and not self._terminated + + def activate_tts(self, tts_id): + self.active_tts = tts_id + tts = self.get_attached_tts() + if tts: + tts.begin_audio() + + def deactivate_tts(self): + if self.active_tts: + tts = self.get_attached_tts() + if tts: + tts.end_audio() + self.active_tts = None + + def init(self, tts): + """DEPRECATED! Init the TTS Playback thread.""" + self.attach_tts(tts) + self.set_bus(tts.bus) + + def set_bus(self, bus): + """Provide bus instance to the TTS Playback thread. + Args: + bus (MycroftBusClient): bus client + """ + self.bus = bus + self.tts_transform.set_bus(bus) + + @property + def tts(self): + tts = self.get_attached_tts() + if not tts and self._tts: + return self._tts[0] + return tts + + @tts.setter + def tts(self, val): + self.attach_tts(val) + + @property + def attached_tts(self): + return self._tts + + def attach_tts(self, tts): + """Add TTS to be cache checked.""" + if tts not in self.attached_tts: + self.attached_tts.append(tts) + + def detach_tts(self, tts): + """Remove TTS from cache check.""" + if tts in self.attached_tts: + self.attached_tts.remove(tts) + + def get_attached_tts(self, tts_id=None): + tts_id = tts_id or self.active_tts + if not tts_id: + return + for tts in self.attached_tts: + if hasattr(tts, "tts_id"): + # opm plugin + if tts.tts_id == tts_id: + return tts + + for tts in self.attached_tts: + if not hasattr(tts, "tts_id"): + # non-opm plugin + if tts.tts_name == tts_id: + return tts + + def clear_queue(self): + """Remove all pending playbacks.""" + while not self.queue.empty(): + self.queue.get() + try: + self.p.terminate() + except Exception: + pass + + def begin_audio(self, message=None): + """Perform beginning of speech actions.""" + if self.bus: + message = message or Message("speak") + self.bus.emit(message.forward("recognizer_loop:audio_output_start")) + else: + LOG.warning("Speech started before bus was attached.") + + def end_audio(self, listen, message=None): + """Perform end of speech output actions. + Will inform the system that speech has ended and trigger the TTS's + cache checks. Listening will be triggered if requested. + Args: + listen (bool): True if listening event should be emitted + """ + if self.bus: + # Send end of speech signals to the system + message = message or Message("speak") + self.bus.emit(message.forward("recognizer_loop:audio_output_end")) + if listen: + self.bus.emit(message.forward('mycroft.mic.listen')) + else: + LOG.warning("Speech started before bus was attached.") + + def on_start(self, message=None): + self.blink(0.5) + if not self._processing_queue: + self._processing_queue = True + self.begin_audio(message) + + def on_end(self, listen=False, message=None): + if self._processing_queue: + self.end_audio(listen, message) + self._processing_queue = False + # Clear cache for all attached tts objects + # This is basically the only safe time + for tts in self.attached_tts: + tts.cache.curate() + self.blink(0.2) + + def _play(self): + try: + data, visemes, listen, tts_id, message = self._now_playing + self.activate_tts(tts_id) + self.on_start(message) + + data = self.tts_transform.transform(data, message.context) + + self.p = play_audio(data) + if visemes: + self.show_visemes(visemes) + if self.p: + self.p.communicate() + self.p.wait() + self.deactivate_tts() + if self.queue.empty(): + self.on_end(listen, message) + except Empty: + pass + except Exception as e: + LOG.exception(e) + if self._processing_queue: + self.on_end() + self._now_playing = None + + def run(self, cb=None): + """Thread main loop. Get audio and extra data from queue and play. + + The queue messages is a tuple containing + snd_type: 'mp3' or 'wav' telling the loop what format the data is in + data: path to temporary audio data + videmes: list of visemes to display while playing + listen: if listening should be triggered at the end of the sentence. + + Playback of audio is started and the visemes are sent over the bus + the loop then wait for the playback process to finish before starting + checking the next position in queue. + + If the queue is empty the tts.end_audio() is called possibly triggering + listening. + """ + self._paused = False + self._started.set() + while not self._terminated: + while self._paused: + sleep(0.2) + try: + # HACK: we do these check to account for direct usages of TTS.queue singletons + speech_data = self.queue.get(timeout=2) + if len(speech_data) == 5 and isinstance(speech_data[-1], Message): + data, visemes, listen, tts_id, message = speech_data + else: + LOG.warning("it seems you are interfacing with TTS.queue directly, this is not recommended!\n" + "new expected TTS.queue contents -> data, visemes, listen, tts_id, message") + if len(speech_data) == 6: + # old ovos backwards compat + _, data, visemes, ident, listen, tts_id = speech_data + elif len(speech_data) == 5: + # mycroft style + tts_id = None + _, data, visemes, ident, listen = speech_data + else: + # old mycroft style TODO can this be deprecated? its very very old + listen = False + tts_id = None + _, data, visemes, ident = speech_data + + message = Message("speak", context={"session": {"session_id": ident}}) + + self._now_playing = (data, visemes, listen, tts_id, message) + self._play() + except Exception as e: + pass + + def show_visemes(self, pairs): + """Send viseme data to enclosure + + Args: + pairs (list): Visime and timing pair + + Returns: + bool: True if button has been pressed. + """ + if self.enclosure: + self.enclosure.mouth_viseme(time(), pairs) + + def pause(self): + """pause thread""" + self._paused = True + if self.p: + self.p.terminate() + + def resume(self): + """resume thread""" + if self._now_playing: + self._play() + self._paused = False + + def clear(self): + """Clear all pending actions for the TTS playback thread.""" + self.clear_queue() + + def blink(self, rate=1.0): + """Blink mycroft's eyes""" + if self.enclosure and random.random() < rate: + self.enclosure.eyes_blink("b") + + def stop(self): + """Stop thread""" + self._now_playing = None + self._terminated = True + self.clear_queue() + + def shutdown(self): + self.stop() + for tts in self.attached_tts: + self.detach_tts(tts) + + def __del__(self): + self.shutdown() diff --git a/ovos_audio/service.py b/ovos_audio/service.py index eedb036..2fc6a89 100644 --- a/ovos_audio/service.py +++ b/ovos_audio/service.py @@ -2,8 +2,11 @@ import os.path import time from os.path import exists -from threading import Thread, Lock - +from ovos_audio.audio import AudioService +from ovos_audio.playback import PlaybackThread +from ovos_audio.transformers import DialogTransformersService +from ovos_audio.tts import TTSFactory +from ovos_audio.utils import report_timing, validate_message_context from ovos_bus_client import Message, MessageBusClient from ovos_bus_client.session import SessionManager from ovos_config.config import Configuration @@ -16,10 +19,7 @@ from ovos_utils.metrics import Stopwatch from ovos_utils.process_utils import ProcessStatus, StatusCallbackMap from ovos_utils.sound import play_audio - -from ovos_audio.audio import AudioService -from ovos_audio.tts import TTSFactory -from ovos_audio.utils import report_timing, validate_message_context +from threading import Thread, Lock def on_ready(): @@ -74,6 +74,8 @@ def __init__(self, ready_hook=on_ready, error_hook=on_error, self.bus = bus self.status.bind(self.bus) self.init_messagebus() + self.dialog_transform = DialogTransformersService(self.bus) + self.playback_thread = PlaybackThread(TTS.queue, self.bus) try: self._maybe_reload_tts() @@ -270,6 +272,16 @@ def handle_speak(self, message): stopwatch.start() utterance = message.data['utterance'] + + # allow dialog transformers to rewrite speech + utt2, message.context = self.dialog_transform.transform(dialog=utterance, + context=message.context, + sess=sess) + if utterance != utt2: + LOG.debug(f"original dialog: {utterance}") + LOG.info(f"dialog transformed to: {utt2}") + utterance = utt2 + listen = message.data.get('expect_response', False) self.execute_tts(utterance, sess.session_id, listen, message) @@ -293,7 +305,7 @@ def _maybe_reload_tts(self): # Create new tts instance LOG.info("(re)loading TTS engine") self.tts = TTSFactory.create(config) - self.tts.init(self.bus) + self.tts.init(self.bus, self.playback_thread) self._tts_hash = config.get("module", "") # if fallback TTS is the same as main TTS dont load it @@ -341,7 +353,7 @@ def _get_tts_fallback(self): engine: config.get('tts', {}).get(engine, {})}} self.fallback_tts = TTSFactory.create(cfg) self.fallback_tts.validator.validate() - self.fallback_tts.init(self.bus) + self.fallback_tts.init(self.bus, self.playback_thread) return self.fallback_tts diff --git a/ovos_audio/transformers.py b/ovos_audio/transformers.py new file mode 100644 index 0000000..b6a1ff3 --- /dev/null +++ b/ovos_audio/transformers.py @@ -0,0 +1,159 @@ +from ovos_bus_client.session import Session, SessionManager +from ovos_config import Configuration +from ovos_plugin_manager.dialog_transformers import find_dialog_transformer_plugins, find_tts_transformer_plugins +from ovos_utils.log import LOG +from typing import Tuple + + +class DialogTransformersService: + """ transform dialogs before being sent to TTS """ + + def __init__(self, bus, config=None): + self.loaded_plugins = {} + self.has_loaded = False + self.bus = bus + # to activate a plugin, just add an entry to mycroft.conf for it + self.config = config or Configuration().get("dialog_transformers", {}) + self.load_plugins() + + def load_plugins(self): + for plug_name, plug in find_dialog_transformer_plugins().items(): + if plug_name in self.config: + # if disabled skip it + if not self.config[plug_name].get("active", True): + continue + try: + self.loaded_plugins[plug_name] = plug() + self.loaded_plugins[plug_name].bind(self.bus) + LOG.info(f"loaded audio transformer plugin: {plug_name}") + except Exception as e: + LOG.exception(f"Failed to load dialog transformer plugin: " + f"{plug_name}") + self.has_loaded = True + + @property + def plugins(self) -> list: + """ + Return loaded transformers in priority order, such that modules with a + higher `priority` rank are called first and changes from lower ranked + transformers are applied last. + + A plugin of `priority` 1 will override any existing context keys and + will be the last to modify `audio_data` + """ + return sorted(self.loaded_plugins.values(), + key=lambda k: k.priority, reverse=True) + + def shutdown(self): + """ + Shutdown all loaded plugins + """ + for module in self.plugins: + try: + module.shutdown() + except Exception as e: + LOG.warning(e) + + def transform(self, dialog: str, context: dict = None, sess: Session = None) -> Tuple[str, dict]: + """ + Get transformed audio and context for the preceding audio + @param dialog: str to be spoken + @return: transformed dialog to be sent to TTS + """ + + # TODO property not yet introduced in Session + sess = sess or SessionManager.get() + # if isinstance(sess, dict): + # sess = Session.deserialize(sess) + # active_transformers = sess.dialog_transformers or self.plugins + + active_transformers = self.plugins + + for module in active_transformers: + try: + LOG.debug(f"checking dialog transformer: {module}") + dialog, context = module.transform(dialog, context=kwargs) + LOG.debug(f"{module.name}: {dialog}") + except Exception as e: + LOG.exception(e) + return dialog, context + + +class TTSTransformersService: + """ transform wav_files after TTS """ + + def __init__(self, bus=None, config=None): + self.loaded_plugins = {} + self.has_loaded = False + self.bus = bus + # to activate a plugin, just add an entry to mycroft.conf for it + self.config = config or Configuration().get("tts_transformers", {}) + self.load_plugins() + + def load_plugins(self): + for plug_name, plug in find_tts_transformer_plugins().items(): + if plug_name in self.config: + # if disabled skip it + if not self.config[plug_name].get("active", True): + continue + try: + self.loaded_plugins[plug_name] = plug() + if self.bus: + self.loaded_plugins[plug_name].bind(self.bus) + LOG.info(f"loaded audio transformer plugin: {plug_name}") + except Exception as e: + LOG.exception(f"Failed to load tts transformer plugin: " + f"{plug_name}") + self.has_loaded = True + + def set_bus(self, bus): + self.bus = bus + for p in self.loaded_plugins.values(): + p.bind(self.bus) + + @property + def plugins(self) -> list: + """ + Return loaded transformers in priority order, such that modules with a + higher `priority` rank are called first and changes from lower ranked + transformers are applied last. + + A plugin of `priority` 1 will override any existing context keys and + will be the last to modify `audio_data` + """ + return sorted(self.loaded_plugins.values(), + key=lambda k: k.priority, reverse=True) + + def shutdown(self): + """ + Shutdown all loaded plugins + """ + for module in self.plugins: + try: + module.shutdown() + except Exception as e: + LOG.warning(e) + + def transform(self, wav_file: str, context: dict = None, sess: Session = None) -> Tuple[str, dict]: + """ + Get transformed audio and context for the preceding audio + @param wav_file: str path for the TTS wav file + @return: path to transformed wav file + """ + + # TODO property not yet introduced in Session + sess = sess or SessionManager.get() + # if isinstance(sess, dict): + # sess = Session.deserialize(sess) + # active_transformers = sess.tts_transformers or self.plugins + + active_transformers = self.plugins + + for module in active_transformers: + try: + LOG.debug(f"checking tts transformer: {module}") + wav_file, context = module.transform(wav_file, context=context or {}) + LOG.debug(f"{module.name}: {wav_file}") + except Exception as e: + LOG.exception(e) + return wav_file, context diff --git a/requirements.txt b/requirements.txt index 429581e..2785207 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ ovos-utils~=0.0, >=0.0.34 ovos-bus-client~=0.0, >=0.0.5a1 ovos-config~=0.0,>=0.0.10 -ovos-plugin-manager~=0.0, >=0.0.24a1 +ovos-plugin-manager~=0.0, >=0.0.24a10 ovos_plugin_common_play~=0.0, >=0.0.6a2