From 5593cf867f1477e3ca13efd85b0b7666f488bf34 Mon Sep 17 00:00:00 2001
From: JarbasAI <33701864+JarbasAl@users.noreply.github.com>
Date: Mon, 29 Apr 2024 05:48:09 +0100
Subject: [PATCH] feat/b64_tts (#58)

* feat/b64_tts

A hivemind satellite can listen for `speak` messages from master, emit 'speak:b64_audio', and listen for 'speak:b64_audio.response' with b64 encoded audio to play. eg, in a browser

counterpart to https://github.com/OpenVoiceOS/ovos-dinkum-listener/pull/75

needs https://github.com/OpenVoiceOS/ovos-plugin-manager/pull/195

* Update ovos_audio/service.py

Co-authored-by: Mike <mike@graywind.org>

* Update requirements.txt

---------

Co-authored-by: Mike <mike@graywind.org>
---
 ovos_audio/service.py         | 30 ++++++++++++++++++++++++++++--
 requirements/requirements.txt |  3 +--
 2 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/ovos_audio/service.py b/ovos_audio/service.py
index cafd1e8..46f4c9d 100644
--- a/ovos_audio/service.py
+++ b/ovos_audio/service.py
@@ -1,4 +1,6 @@
+import base64
 import binascii
+import json
 import os
 import os.path
 import time
@@ -253,6 +255,29 @@ def run(self):
         else:
             self.status.set_error('No TTS loaded')
 
+    def handle_b64_audio(self, message):
+        """synthesizes speech, but instead of queuing for playback
+        returns it b64 encoded in the bus
+        allows 3rd party integrations to use OVOS as a TTS service
+        """
+        sess = SessionManager.get(message)
+        stopwatch = Stopwatch()
+        stopwatch.start()
+        utterance = message.data['utterance']
+
+        ctxt = self.tts._get_ctxt({"message": message})
+        wav, _ = self.tts.synth(utterance, ctxt)
+        with open(wav, "rb") as f:
+            audio = f.read()
+
+        b64_audio = base64.b64encode(audio)
+        self.bus.emit(message.response({"audio": b64_audio}))
+
+        stopwatch.stop()
+        report_timing(sess.session_id, stopwatch,
+                      {'utterance': utterance,
+                       'tts': self.tts.plugin_id})
+
     def handle_speak(self, message):
         """Handle "speak" message
 
@@ -294,7 +319,7 @@ def handle_speak(self, message):
         stopwatch.stop()
         report_timing(sess.session_id, stopwatch,
                       {'utterance': utterance,
-                       'tts': self.tts.__class__.__name__})
+                       'tts': self.tts.plugin_id})
 
     def _maybe_reload_tts(self):
         """
@@ -317,7 +342,7 @@ def _maybe_reload_tts(self):
         # if fallback TTS is the same as main TTS dont load it
         if config.get("module", "") == config.get("fallback_module", "") or not config.get("fallback_module", ""):
             LOG.debug("Skipping fallback TTS init, fallback is empty or same as main TTS")
-            return            
+            return
 
         if not config.get('preload_fallback', True):
             LOG.debug("Skipping fallback TTS init")
@@ -547,6 +572,7 @@ def init_messagebus(self):
         self.bus.on('mycroft.audio.queue', self.handle_queue_audio)
         self.bus.on('mycroft.audio.play_sound', self.handle_instant_play)
         self.bus.on('speak', self.handle_speak)
+        self.bus.on('speak:b64_audio', self.handle_b64_audio)
         self.bus.on('ovos.languages.tts', self.handle_get_languages_tts)
         self.bus.on("opm.tts.query", self.handle_opm_tts_query)
         self.bus.on("opm.audio.query", self.handle_opm_audio_query)
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index a4cf68f..56c8963 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -1,5 +1,4 @@
 ovos-utils~=0.0, >=0.0.38
 ovos-bus-client~=0.0, >=0.0.8
 ovos-config~=0.0,>=0.0.13a7
-ovos-plugin-manager~=0.0, >=0.0.25
-
+ovos-plugin-manager~=0.0, >=0.0.26a16