Skip to content

Commit

Permalink
Merge pull request #86 from njbbaer/audio-input
Browse files Browse the repository at this point in the history
Use transcription endpoint to convert voice messages to text
  • Loading branch information
njbbaer authored Jan 31, 2024
2 parents b4d28af + 2cf548b commit 91e88e9
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 5 deletions.
16 changes: 11 additions & 5 deletions src/telegram/telegram_bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,15 @@ def __init__(self, context_filepath, telegram_token, authorized_users):
self.app = ApplicationBuilder().token(telegram_token).build()
self.sim = Simulacrum(context_filepath)

# Ignore stale messages
self.app.add_handler(MessageHandler(StaleMessageFilter(), self.do_nothing))

# Disallow unauthorized users
self.app.add_handler(
MessageHandler(~filters.User(username=authorized_users), self.unauthorized)
)

# Handle commands
self.app.add_handler(
CommandHandler("new", self.new_conversation_command_handler)
)
Expand All @@ -32,12 +37,16 @@ def __init__(self, context_filepath, telegram_token, authorized_users):
self.app.add_handler(CommandHandler("clear", self.clear_command_handler))
self.app.add_handler(CommandHandler("help", self.help_command_handler))
self.app.add_handler(CommandHandler("start", self.do_nothing))

# Handle messages
self.app.add_handler(
MessageHandler(
filters.TEXT & ~filters.COMMAND | filters.PHOTO,
(filters.TEXT & ~filters.COMMAND) | filters.PHOTO | filters.VOICE,
self.chat_message_handler,
)
)

# Handle unknown messages and errors
self.app.add_handler(MessageHandler(filters.ALL, self.unknown_message_handler))
self.app.add_error_handler(self.error_handler)

Expand All @@ -47,7 +56,7 @@ def run(self):
@message_handler
async def chat_message_handler(self, ctx):
image_url = await ctx.get_image_url()
text = ctx.message.text or ctx.message.caption
text = await ctx.get_text()
await self._chat(ctx, text, image_url)

@message_handler
Expand Down Expand Up @@ -75,18 +84,15 @@ async def undo_command_handler(self, ctx):
@message_handler
async def stats_command_handler(self, ctx):
lines = []

lines.append("*Conversation*")
lines.append(f"`Cost: ${self.sim.get_conversation_cost():.2f}`")

lines.append("\n*Last Message*")
if self.sim.last_cost:
lines.append(f"`Cost: ${self.sim.last_cost:.2f}`")
lines.append(f"`Prompt tokens: {self.sim.last_prompt_tokens}`")
lines.append(f"`Completion tokens: {self.sim.last_completion_tokens}`")
else:
lines.append("`Not available`")

await ctx.send_message("\n".join(lines))

@message_handler
Expand Down
25 changes: 25 additions & 0 deletions src/telegram/telegram_context.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
import os
import uuid

from openai import AsyncOpenAI

from telegram.error import BadRequest


Expand All @@ -23,6 +28,11 @@ async def get_image_url(self):
file = await self.app.bot.get_file(photo_file.file_id)
return file.file_path

async def get_text(self):
if self.message.voice:
return await self._transcribe_voice()
return self.message.text or self.message.caption

async def send_message(self, text):
# Attempt to fix broken markdown
if text.count("_") % 2 != 0 and text.endswith("_"):
Expand All @@ -35,3 +45,18 @@ async def send_message(self, text):

async def send_typing_action(self):
await self.app.bot.send_chat_action(chat_id=self.chat_id, action="typing")

async def _transcribe_voice(self):
file_id = self.message.voice.file_id
voice_file = await self.context.bot.get_file(file_id)
os.makedirs("tmp", exist_ok=True)
voice_filepath = f"tmp/{uuid.uuid4()}.ogg"
try:
await voice_file.download_to_drive(voice_filepath)
with open(voice_filepath, "rb") as file:
transcript = await AsyncOpenAI().audio.transcriptions.create(
model="whisper-1", file=file
)
return transcript.text
finally:
os.remove(voice_filepath)

0 comments on commit 91e88e9

Please sign in to comment.