From 5593cf867f1477e3ca13efd85b0b7666f488bf34 Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Mon, 29 Apr 2024 05:48:09 +0100 Subject: [PATCH] feat/b64_tts (#58) * feat/b64_tts A hivemind satellite can listen for `speak` messages from master, emit 'speak:b64_audio', and listen for 'speak:b64_audio.response' with b64 encoded audio to play. eg, in a browser counterpart to https://github.com/OpenVoiceOS/ovos-dinkum-listener/pull/75 needs https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/195 * Update ovos_audio/service.py Co-authored-by: Mike * Update requirements.txt --------- Co-authored-by: Mike --- ovos_audio/service.py | 30 ++++++++++++++++++++++++++++-- requirements/requirements.txt | 3 +-- 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/ovos_audio/service.py b/ovos_audio/service.py index cafd1e8..46f4c9d 100644 --- a/ovos_audio/service.py +++ b/ovos_audio/service.py @@ -1,4 +1,6 @@ +import base64 import binascii +import json import os import os.path import time @@ -253,6 +255,29 @@ def run(self): else: self.status.set_error('No TTS loaded') + def handle_b64_audio(self, message): + """synthesizes speech, but instead of queuing for playback + returns it b64 encoded in the bus + allows 3rd party integrations to use OVOS as a TTS service + """ + sess = SessionManager.get(message) + stopwatch = Stopwatch() + stopwatch.start() + utterance = message.data['utterance'] + + ctxt = self.tts._get_ctxt({"message": message}) + wav, _ = self.tts.synth(utterance, ctxt) + with open(wav, "rb") as f: + audio = f.read() + + b64_audio = base64.b64encode(audio) + self.bus.emit(message.response({"audio": b64_audio})) + + stopwatch.stop() + report_timing(sess.session_id, stopwatch, + {'utterance': utterance, + 'tts': self.tts.plugin_id}) + def handle_speak(self, message): """Handle "speak" message @@ -294,7 +319,7 @@ def handle_speak(self, message): stopwatch.stop() report_timing(sess.session_id, stopwatch, {'utterance': utterance, - 'tts': self.tts.__class__.__name__}) + 'tts': self.tts.plugin_id}) def _maybe_reload_tts(self): """ @@ -317,7 +342,7 @@ def _maybe_reload_tts(self): # if fallback TTS is the same as main TTS dont load it if config.get("module", "") == config.get("fallback_module", "") or not config.get("fallback_module", ""): LOG.debug("Skipping fallback TTS init, fallback is empty or same as main TTS") - return + return if not config.get('preload_fallback', True): LOG.debug("Skipping fallback TTS init") @@ -547,6 +572,7 @@ def init_messagebus(self): self.bus.on('mycroft.audio.queue', self.handle_queue_audio) self.bus.on('mycroft.audio.play_sound', self.handle_instant_play) self.bus.on('speak', self.handle_speak) + self.bus.on('speak:b64_audio', self.handle_b64_audio) self.bus.on('ovos.languages.tts', self.handle_get_languages_tts) self.bus.on("opm.tts.query", self.handle_opm_tts_query) self.bus.on("opm.audio.query", self.handle_opm_audio_query) diff --git a/requirements/requirements.txt b/requirements/requirements.txt index a4cf68f..56c8963 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -1,5 +1,4 @@ ovos-utils~=0.0, >=0.0.38 ovos-bus-client~=0.0, >=0.0.8 ovos-config~=0.0,>=0.0.13a7 -ovos-plugin-manager~=0.0, >=0.0.25 - +ovos-plugin-manager~=0.0, >=0.0.26a16