move into helper method for readibility

OpenVoiceOS · Jun 2, 2024 · 931743e · 931743e
1 parent d03b68c
commit 931743e
Showing 1 changed file with 24 additions and 18 deletions.
diff --git a/ovos_dinkum_listener/voice_loop/voice_loop.py b/ovos_dinkum_listener/voice_loop/voice_loop.py
@@ -683,6 +683,29 @@ def _get_tx(self, stt_context: dict) -> (str, dict):
         stt_context["transcription"] = text
         return text, stt_context
 
+    def _vad_remove_silence(self):
+        """removes silence from the STT buffer using the VAD plugin
+        trimmed audio will never be < 1 second
+        """
+        # NOTE: This is using the FS-STT buffer directly, not the S-STT queue
+        n_chunks = len(self.stt_audio_bytes) / self.mic.chunk_size
+        seconds = n_chunks * self.mic.seconds_per_chunk
+        LOG.debug(f"recorded {seconds} seconds of audio")
+        if seconds > 1:
+            extracted_speech = self.vad.extract_speech(self.stt_audio_bytes)
+            n_chunks = len(extracted_speech) / self.mic.chunk_size
+            seconds2 = n_chunks * self.mic.seconds_per_chunk
+            LOG.debug(f"removed {seconds - seconds2} seconds of silence, "
+                      f"trimmed audio has {seconds2} seconds")
+            if extracted_speech and seconds2 >= 1:
+                self.stt.stream.buffer.clear()
+                # replace the stt buffer with cropped audio
+                self.stt.stream.update(extracted_speech)
+            else:
+                LOG.debug("trimmed audio is too short! skipping VAD silence removal")
+        else:
+            LOG.debug(f"skipping silence removal")
+
     def _after_cmd(self, chunk: bytes):
         """
         Handle audio chunk after VAD has determined a command is ended.
@@ -694,24 +717,7 @@ def _after_cmd(self, chunk: bytes):
         # Command has ended, call transformers pipeline before STT
         chunk, stt_context = self.transformers.transform(chunk)
         if isinstance(self.stt, FakeStreamingSTT) and self.remove_silence:
-            # NOTE: This is using the FS-STT buffer directly, not the S-STT queue
-            n_chunks = len(self.stt_audio_bytes) / self.mic.chunk_size
-            seconds = n_chunks * self.mic.seconds_per_chunk
-            LOG.debug(f"recorded {seconds} seconds of audio")
-            if seconds > 1:
-                extracted_speech = self.vad.extract_speech(self.stt_audio_bytes)
-                n_chunks = len(extracted_speech) / self.mic.chunk_size
-                seconds2 = n_chunks * self.mic.seconds_per_chunk
-                LOG.debug(f"removed {seconds - seconds2} seconds of silence, "
-                          f"trimmed audio has {seconds2} seconds")
-                if extracted_speech and seconds2 >= 1:
-                    self.stt.stream.buffer.clear()
-                    # replace the stt buffer with cropped audio
-                    self.stt.stream.update(extracted_speech)
-                else:
-                    LOG.debug("trimmed audio is too short! skipping VAD silence removal")
-            else:
-                LOG.debug(f"skipping silence removal")
+            self._vad_remove_silence()
 
         text, stt_context = self._get_tx(stt_context)