From 93da742dc2ba4d0eb64863c5972cc042365d9b42 Mon Sep 17 00:00:00 2001 From: Samuel Park Date: Wed, 11 Feb 2026 15:33:01 -0600 Subject: [PATCH] Polish assistant flow and align audio helper --- main.py | 214 ++++++++++++++++++++++++++++++++++++++++++++--- requirements.txt | 1 + 2 files changed, 205 insertions(+), 10 deletions(-) create mode 100644 requirements.txt diff --git a/main.py b/main.py index d96d29d..26b997c 100644 --- a/main.py +++ b/main.py @@ -1,16 +1,210 @@ -# This is a sample Python script. +"""BMO Raspberry Pi assistant with OpenAI chat + speech-to-text.""" -# Press Shift+F10 to execute it or replace it with your code. -# Press Double Shift to search everywhere for classes, files, tool windows, actions, and settings. +from __future__ import annotations +import argparse +import os +import shutil +import subprocess +import tempfile +import threading +from pathlib import Path -def print_hi(name): - # Use a breakpoint in the code line below to debug your script. - print(f'Hi, {name}') # Press Ctrl+F8 to toggle the breakpoint. +from openai import OpenAI +from openai.types.chat import ( + ChatCompletionSystemMessageParam, + ChatCompletionUserMessageParam, +) +CHAT_MODEL = os.getenv("OPENAI_CHAT_MODEL", "gpt-4o-mini") +TRANSCRIPTION_MODEL = os.getenv("OPENAI_STT_MODEL", "gpt-4o-mini-transcribe") +RECORD_SECONDS = int(os.getenv("RECORD_SECONDS", "5")) -# Press the green button in the gutter to run the script. -if __name__ == '__main__': - print_hi('BMO') -# See PyCharm help at https://www.jetbrains.com/help/pycharm/ +def record_audio_wav(output_path: Path, duration_seconds: int) -> None: + """Record a mono 16kHz wav file from the default ALSA input device.""" + if shutil.which("arecord") is None: + raise RuntimeError( + "`arecord` not found. Install with: sudo apt install alsa-utils" + ) + + subprocess.run( + [ + "arecord", + "-d", + str(duration_seconds), + "-f", + "S16_LE", + "-r", + "16000", + "-c", + "1", + str(output_path), + ], + check=True, + ) + + +class BMOAssistant: + def __init__(self, client: OpenAI): + self.client = client + + def ask_chatbot(self, prompt: str) -> str: + """Type-safe OpenAI chat call (fixes PyCharm type warning).""" + system_message: ChatCompletionSystemMessageParam = { + "role": "system", + "content": ( + "You are BMO, a friendly Raspberry Pi assistant. " + "Keep answers concise and practical." + ), + } + user_message: ChatCompletionUserMessageParam = { + "role": "user", + "content": prompt, + } + + response = self.client.chat.completions.create( + model=CHAT_MODEL, + messages=[system_message, user_message], + ) + + content = response.choices[0].message.content or "" + return content.strip() + + def transcribe_microphone(self, duration_seconds: int = RECORD_SECONDS) -> str: + with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav: + temp_path = Path(temp_wav.name) + + try: + record_audio_wav(temp_path, duration_seconds) + with temp_path.open("rb") as audio_file: + transcript = self.client.audio.transcriptions.create( + model=TRANSCRIPTION_MODEL, + file=audio_file, + ) + return transcript.text.strip() + finally: + temp_path.unlink(missing_ok=True) + + +def require_openai_client() -> OpenAI: + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: + raise RuntimeError("Set OPENAI_API_KEY first.") + return OpenAI(api_key=api_key) + + +def run_cli(assistant: BMOAssistant) -> None: + print("=== BMO OpenAI Assistant (CLI) ===") + print("1) Text chatbot") + print("2) Speech-to-text + chatbot") + + choice = input("Select mode (1/2): ").strip() + if choice == "1": + while True: + user_input = input("You: ").strip() + if user_input.lower() in {"quit", "exit", "q"}: + return + if not user_input: + continue + print(f"BMO: {assistant.ask_chatbot(user_input)}\n") + elif choice == "2": + print(f"Recording for {RECORD_SECONDS} seconds...") + spoken_text = assistant.transcribe_microphone() + print(f"You (speech): {spoken_text}") + print(f"BMO: {assistant.ask_chatbot(spoken_text)}") + else: + print("Invalid choice. Please run again and pick 1 or 2.") + + +def run_gui(assistant: BMOAssistant) -> None: + import tkinter as tk + from tkinter import scrolledtext + + root = tk.Tk() + root.title("BMO Assistant") + root.geometry("900x520") + + output = scrolledtext.ScrolledText(root, wrap=tk.WORD, font=("Arial", 14)) + output.pack(fill=tk.BOTH, expand=True, padx=10, pady=10) + + input_frame = tk.Frame(root) + input_frame.pack(fill=tk.X, padx=10, pady=(0, 10)) + + user_entry = tk.Entry(input_frame, font=("Arial", 14)) + user_entry.pack(side=tk.LEFT, fill=tk.X, expand=True) + + def append_line(text: str) -> None: + output.insert(tk.END, text + "\n") + output.see(tk.END) + + def set_enabled(enabled: bool) -> None: + state = tk.NORMAL if enabled else tk.DISABLED + user_entry.configure(state=state) + send_button.configure(state=state) + speak_button.configure(state=state) + + def send_text() -> None: + text = user_entry.get().strip() + if not text: + return + append_line(f"You: {text}") + user_entry.delete(0, tk.END) + set_enabled(False) + + def worker() -> None: + try: + reply = assistant.ask_chatbot(text) + root.after(0, lambda: append_line(f"BMO: {reply}\n")) + except Exception as exc: + root.after(0, lambda: append_line(f"Error: {exc}\n")) + finally: + root.after(0, lambda: set_enabled(True)) + + threading.Thread(target=worker, daemon=True).start() + + def speak() -> None: + set_enabled(False) + append_line("BMO: Recording...") + + def worker() -> None: + try: + spoken = assistant.transcribe_microphone() + root.after(0, lambda: append_line(f"You (speech): {spoken}")) + reply = assistant.ask_chatbot(spoken) + root.after(0, lambda: append_line(f"BMO: {reply}\n")) + except Exception as exc: + root.after(0, lambda: append_line(f"Error: {exc}\n")) + finally: + root.after(0, lambda: set_enabled(True)) + + threading.Thread(target=worker, daemon=True).start() + + send_button = tk.Button(input_frame, text="Send", command=send_text) + send_button.pack(side=tk.LEFT, padx=(8, 0)) + + speak_button = tk.Button(input_frame, text="Speak", command=speak) + speak_button.pack(side=tk.LEFT, padx=(8, 0)) + + user_entry.bind("", lambda _event: send_text()) + append_line("BMO: Ready") + root.mainloop() + + +def parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser() + parser.add_argument("--mode", choices=["cli", "gui"], default="cli") + return parser.parse_args() + + +def main() -> None: + args = parse_args() + assistant = BMOAssistant(require_openai_client()) + if args.mode == "gui": + run_gui(assistant) + else: + run_cli(assistant) + + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3326ddf --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +openai>=1.40.0