Skip to content

Commit

Permalink
move into helper method for readibility
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl committed Jun 2, 2024
1 parent d03b68c commit 931743e
Showing 1 changed file with 24 additions and 18 deletions.
42 changes: 24 additions & 18 deletions ovos_dinkum_listener/voice_loop/voice_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,6 +683,29 @@ def _get_tx(self, stt_context: dict) -> (str, dict):
stt_context["transcription"] = text
return text, stt_context

def _vad_remove_silence(self):
"""removes silence from the STT buffer using the VAD plugin
trimmed audio will never be < 1 second
"""
# NOTE: This is using the FS-STT buffer directly, not the S-STT queue
n_chunks = len(self.stt_audio_bytes) / self.mic.chunk_size
seconds = n_chunks * self.mic.seconds_per_chunk
LOG.debug(f"recorded {seconds} seconds of audio")
if seconds > 1:
extracted_speech = self.vad.extract_speech(self.stt_audio_bytes)
n_chunks = len(extracted_speech) / self.mic.chunk_size
seconds2 = n_chunks * self.mic.seconds_per_chunk
LOG.debug(f"removed {seconds - seconds2} seconds of silence, "
f"trimmed audio has {seconds2} seconds")
if extracted_speech and seconds2 >= 1:
self.stt.stream.buffer.clear()
# replace the stt buffer with cropped audio
self.stt.stream.update(extracted_speech)
else:
LOG.debug("trimmed audio is too short! skipping VAD silence removal")
else:
LOG.debug(f"skipping silence removal")

def _after_cmd(self, chunk: bytes):
"""
Handle audio chunk after VAD has determined a command is ended.
Expand All @@ -694,24 +717,7 @@ def _after_cmd(self, chunk: bytes):
# Command has ended, call transformers pipeline before STT
chunk, stt_context = self.transformers.transform(chunk)
if isinstance(self.stt, FakeStreamingSTT) and self.remove_silence:
# NOTE: This is using the FS-STT buffer directly, not the S-STT queue
n_chunks = len(self.stt_audio_bytes) / self.mic.chunk_size
seconds = n_chunks * self.mic.seconds_per_chunk
LOG.debug(f"recorded {seconds} seconds of audio")
if seconds > 1:
extracted_speech = self.vad.extract_speech(self.stt_audio_bytes)
n_chunks = len(extracted_speech) / self.mic.chunk_size
seconds2 = n_chunks * self.mic.seconds_per_chunk
LOG.debug(f"removed {seconds - seconds2} seconds of silence, "
f"trimmed audio has {seconds2} seconds")
if extracted_speech and seconds2 >= 1:
self.stt.stream.buffer.clear()
# replace the stt buffer with cropped audio
self.stt.stream.update(extracted_speech)
else:
LOG.debug("trimmed audio is too short! skipping VAD silence removal")
else:
LOG.debug(f"skipping silence removal")
self._vad_remove_silence()

text, stt_context = self._get_tx(stt_context)

Expand Down

0 comments on commit 931743e

Please sign in to comment.