edgarjs · claudio-pi · Feb 12, 2026 · Feb 12, 2026 · gemini-code-assist · Feb 12, 2026
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -12,10 +12,11 @@ Claudio is a messaging-to-Claude Code bridge. It supports both Telegram and What
 - `lib/cli.py` — CLI dispatch logic. Uses `sys.argv` (not argparse) with lazy imports per command for fast startup.
 - `lib/config.py` — `ClaudioConfig` class for global config (`~/.claudio/service.env`), `BotConfig` class for per-bot config (`~/.claudio/bots/<bot_id>/bot.env`). Functions: `parse_env_file()`, `save_bot_env()`, `save_model()`. Auto-migrates single-bot to multi-bot layout.
 - `lib/server.py` — Python HTTP server (stdlib `http.server`), listens on port 8421, routes POST `/telegram/webhook` and POST/GET `/whatsapp/webhook`. Multi-bot dispatch: matches Telegram webhooks via secret-token header, WhatsApp webhooks via HMAC-SHA256 signature verification. Supports dual-platform bots (same bot_id serving both Telegram and WhatsApp). Loads bot registry from `~/.claudio/bots/*/bot.env`. SIGHUP handler for hot-reload. Composite queue keys (`bot_id:chat_id` for Telegram, `bot_id:phone_number` for WhatsApp) for per-bot, per-user message isolation. `/reload` endpoint (requires `MANAGEMENT_SECRET` authentication). Webhook processing delegates to `lib/handlers.py`.
-- `lib/handlers.py` — Webhook orchestrator: parses webhooks, runs unified message pipeline (media download, voice transcription, Claude invocation, response delivery). Entry point: `process_webhook()`.
+- `lib/handlers.py` — Webhook orchestrator: parses webhooks, runs unified message pipeline (media download, voice transcription, Claude invocation, response delivery). Speech provider dispatch (`_stt_transcribe()`, `_tts_convert()`) selects ElevenLabs or Speechmatics based on `SPEECH_PROVIDER` config. Entry point: `process_webhook()`.
 - `lib/telegram_api.py` — `TelegramClient` class: send messages (4096-char chunking with Markdown fallback), send voice, typing indicator, reactions, file downloads with magic byte validation. Retry on 429/5xx.
 - `lib/whatsapp_api.py` — `WhatsAppClient` class: send messages (4096-char chunking), send audio, mark read, media downloads (two-step URL resolution). Retry on 429/5xx.
 - `lib/elevenlabs.py` — ElevenLabs TTS (`tts_convert()`) and STT (`stt_transcribe()`). Stdlib only.
+- `lib/speechmatics.py` — Speechmatics TTS (`tts_convert()`) and STT (`stt_transcribe()`). TTS returns WAV audio; STT uses async batch API (submit job, poll, fetch transcript). Stdlib only.
 - `lib/claude_runner.py` — Claude CLI invocation with `start_new_session=True`, MCP config, JSON output parsing, token usage persistence. Returns `ClaudeResult` namedtuple.
 - `lib/setup.py` — Interactive setup wizards: `telegram_setup()`, `whatsapp_setup()`, `bot_setup()`. Validates credentials via API calls, polls for Telegram `/start`, generates secrets, saves config.
 - `lib/service.py` — Service management: systemd/launchd unit generation, symlink install, cloudflared tunnel setup, webhook registration with retry, cron health-check install, Claude hooks install, `service_status()`, `service_restart()`, `service_update()`, `service_install()`, `service_uninstall()`.
@@ -32,4 +33,4 @@ Claudio is a messaging-to-Claude Code bridge. It supports both Telegram and What
 
 Run locally with `./claudio start`. Requires `python3`, `sqlite3`, `cloudflared`, and `claude` CLI. The memory system optionally requires the `fastembed` Python package (degrades gracefully without it).
 
-**Tests:** `python3 -m pytest tests/` — 640 tests covering all modules (config, util, setup, service, backup, health_check, server, handlers, telegram_api, whatsapp_api, elevenlabs, claude_runner, cli).
+**Tests:** `python3 -m pytest tests/` — 673 tests covering all modules (config, util, setup, service, backup, health_check, server, handlers, telegram_api, whatsapp_api, elevenlabs, speechmatics, claude_runner, cli).
diff --git a/lib/config.py b/lib/config.py
@@ -69,9 +69,14 @@ class BotConfig:
         'whatsapp_app_secret', 'whatsapp_verify_token', 'whatsapp_phone_number',
         # Common
         'model', 'max_history_lines',
+        # Speech provider selection (from service.env)
+        'speech_provider',
         # ElevenLabs (from service.env)
         'elevenlabs_api_key', 'elevenlabs_voice_id', 'elevenlabs_model',
         'elevenlabs_stt_model',
+        # Speechmatics (from service.env)
+        'speechmatics_api_key', 'speechmatics_voice_id',
+        'speechmatics_stt_region',
         # Memory (from service.env)
         'memory_enabled',
         # Database
@@ -84,9 +89,12 @@ def __init__(self, bot_id, bot_dir=None,
                  whatsapp_app_secret='', whatsapp_verify_token='',
                  whatsapp_phone_number='',
                  model='haiku', max_history_lines=100,
+                 speech_provider='elevenlabs',
                  elevenlabs_api_key='', elevenlabs_voice_id='iP95p4xoKVk53GoZ742B',
                  elevenlabs_model='eleven_multilingual_v2',
                  elevenlabs_stt_model='scribe_v1',
+                 speechmatics_api_key='', speechmatics_voice_id='sarah',
+                 speechmatics_stt_region='eu1',
                  memory_enabled=True, db_file=''):
         self.bot_id = bot_id
         self.bot_dir = bot_dir or ''
@@ -100,10 +108,14 @@ def __init__(self, bot_id, bot_dir=None,
         self.whatsapp_phone_number = whatsapp_phone_number
         self.model = model
         self.max_history_lines = int(max_history_lines)
+        self.speech_provider = speech_provider
         self.elevenlabs_api_key = elevenlabs_api_key
         self.elevenlabs_voice_id = elevenlabs_voice_id
         self.elevenlabs_model = elevenlabs_model
         self.elevenlabs_stt_model = elevenlabs_stt_model
+        self.speechmatics_api_key = speechmatics_api_key
+        self.speechmatics_voice_id = speechmatics_voice_id
+        self.speechmatics_stt_region = speechmatics_stt_region
         self.memory_enabled = memory_enabled
         self.db_file = db_file or (os.path.join(bot_dir, 'history.db') if bot_dir else '')
 
@@ -135,11 +147,17 @@ def from_bot_config(cls, bot_id, bot_config, service_env=None):
             # Common
             model=bot_config.get('model', 'haiku'),
             max_history_lines=bot_config.get('max_history_lines', '100'),
+            # Speech provider (from service.env)
+            speech_provider=svc.get('SPEECH_PROVIDER', 'elevenlabs'),
             # ElevenLabs (from service.env)
             elevenlabs_api_key=svc.get('ELEVENLABS_API_KEY', ''),
             elevenlabs_voice_id=svc.get('ELEVENLABS_VOICE_ID', 'iP95p4xoKVk53GoZ742B'),
             elevenlabs_model=svc.get('ELEVENLABS_MODEL', 'eleven_multilingual_v2'),
             elevenlabs_stt_model=svc.get('ELEVENLABS_STT_MODEL', 'scribe_v1'),
+            # Speechmatics (from service.env)
+            speechmatics_api_key=svc.get('SPEECHMATICS_API_KEY', ''),
+            speechmatics_voice_id=svc.get('SPEECHMATICS_VOICE_ID', 'sarah'),
+            speechmatics_stt_region=svc.get('SPEECHMATICS_STT_REGION', 'eu1'),
             # Memory
             memory_enabled=svc.get('MEMORY_ENABLED', '1') == '1',
             db_file=os.path.join(bot_dir, 'history.db') if bot_dir else '',
@@ -181,11 +199,17 @@ def from_env_files(cls, bot_id, claudio_path=None):
             # Common
             model=bot_env.get('MODEL', 'haiku'),
             max_history_lines=bot_env.get('MAX_HISTORY_LINES', '100'),
+            # Speech provider (from service.env)
+            speech_provider=svc.get('SPEECH_PROVIDER', 'elevenlabs'),
             # ElevenLabs (from service.env)
             elevenlabs_api_key=svc.get('ELEVENLABS_API_KEY', ''),
             elevenlabs_voice_id=svc.get('ELEVENLABS_VOICE_ID', 'iP95p4xoKVk53GoZ742B'),
             elevenlabs_model=svc.get('ELEVENLABS_MODEL', 'eleven_multilingual_v2'),
             elevenlabs_stt_model=svc.get('ELEVENLABS_STT_MODEL', 'scribe_v1'),
+            # Speechmatics (from service.env)
+            speechmatics_api_key=svc.get('SPEECHMATICS_API_KEY', ''),
+            speechmatics_voice_id=svc.get('SPEECHMATICS_VOICE_ID', 'sarah'),
+            speechmatics_stt_region=svc.get('SPEECHMATICS_STT_REGION', 'eu1'),
             # Memory
             memory_enabled=svc.get('MEMORY_ENABLED', '1') == '1',
         )
@@ -277,8 +301,12 @@ class ClaudioConfig:
     # Keys managed in service.env (global, not per-bot)
     _MANAGED_KEYS = [
         'PORT', 'WEBHOOK_URL', 'TUNNEL_NAME', 'TUNNEL_HOSTNAME',
-        'WEBHOOK_RETRY_DELAY', 'ELEVENLABS_API_KEY', 'ELEVENLABS_VOICE_ID',
-        'ELEVENLABS_MODEL', 'ELEVENLABS_STT_MODEL', 'MEMORY_ENABLED',
+        'WEBHOOK_RETRY_DELAY', 'SPEECH_PROVIDER',
+        'ELEVENLABS_API_KEY', 'ELEVENLABS_VOICE_ID',
+        'ELEVENLABS_MODEL', 'ELEVENLABS_STT_MODEL',
+        'SPEECHMATICS_API_KEY', 'SPEECHMATICS_VOICE_ID',
+        'SPEECHMATICS_STT_REGION',
+        'MEMORY_ENABLED',
         'MEMORY_EMBEDDING_MODEL', 'MEMORY_CONSOLIDATION_MODEL',
     ]
 
@@ -295,10 +323,14 @@ class ClaudioConfig:
         'TUNNEL_NAME': '',
         'TUNNEL_HOSTNAME': '',
         'WEBHOOK_RETRY_DELAY': '60',
+        'SPEECH_PROVIDER': 'elevenlabs',
         'ELEVENLABS_API_KEY': '',
         'ELEVENLABS_VOICE_ID': 'iP95p4xoKVk53GoZ742B',
         'ELEVENLABS_MODEL': 'eleven_multilingual_v2',
         'ELEVENLABS_STT_MODEL': 'scribe_v1',
+        'SPEECHMATICS_API_KEY': '',
+        'SPEECHMATICS_VOICE_ID': 'sarah',
+        'SPEECHMATICS_STT_REGION': 'eu1',
         'MEMORY_ENABLED': '1',
         'MEMORY_EMBEDDING_MODEL': 'sentence-transformers/all-MiniLM-L6-v2',
         'MEMORY_CONSOLIDATION_MODEL': 'haiku',

diff --git a/lib/handlers.py b/lib/handlers.py
@@ -25,7 +25,8 @@
 )
 from lib.telegram_api import TelegramClient
 from lib.whatsapp_api import WhatsAppClient
-from lib.elevenlabs import tts_convert, stt_transcribe
+from lib.elevenlabs import tts_convert as elevenlabs_tts, stt_transcribe as elevenlabs_stt
+from lib.speechmatics import tts_convert as speechmatics_tts, stt_transcribe as speechmatics_stt
 from lib.claude_runner import run_claude
 
 # -- Constants --
@@ -384,6 +385,46 @@ def _memory_consolidate():
         pass
 
 
+# -- Speech provider dispatch --
+
+def _get_speech_api_key(config):
+    """Return the API key for the configured speech provider, or ''."""
+    if config.speech_provider == 'speechmatics':
+        return config.speechmatics_api_key
+    return config.elevenlabs_api_key
+
+
+def _stt_transcribe(audio_path, config):
+    """Transcribe audio using the configured speech provider."""
+    if config.speech_provider == 'speechmatics':
+        return speechmatics_stt(
+            audio_path,
+            config.speechmatics_api_key,
+            region=config.speechmatics_stt_region,
+        )
+    return elevenlabs_stt(
+        audio_path,
+        config.elevenlabs_api_key,
+        model=config.elevenlabs_stt_model,
+    )
+
+
+def _tts_convert(text, output_path, config):
+    """Convert text to speech using the configured speech provider."""
+    if config.speech_provider == 'speechmatics':
+        return speechmatics_tts(
+            text, output_path,
+            config.speechmatics_api_key,
+            config.speechmatics_voice_id,
+        )
+    return elevenlabs_tts(
+        text, output_path,
+        config.elevenlabs_api_key,
+        config.elevenlabs_voice_id,
+        config.elevenlabs_model,
+    )
+
+
 # -- Main entry point --
 
 def process_webhook(body, bot_id, platform, bot_config_dict):
@@ -562,10 +603,14 @@ def _process_message(msg, text, config, client, platform, bot_id):
         # -- Voice transcription --
 
         if msg.has_voice:
-            if not config.elevenlabs_api_key:
+            stt_api_key = _get_speech_api_key(config)
+            if not stt_api_key:
+                provider = config.speech_provider or 'elevenlabs'
+                key_name = ('SPEECHMATICS_API_KEY' if provider == 'speechmatics'
+                            else 'ELEVENLABS_API_KEY')
                 client.send_message(
                     msg.chat_id,
-                    f"_{voice_label.capitalize()} messages require ELEVENLABS_API_KEY "
+                    f"_{voice_label.capitalize()} messages require {key_name} "
                     f"to be configured._",
                     reply_to=msg.message_id,
                 )
@@ -593,11 +638,7 @@ def _process_message(msg, text, config, client, platform, bot_id):
                 )
                 return
 
-            transcription = stt_transcribe(
-                voice_file,
-                config.elevenlabs_api_key,
-                model=config.elevenlabs_stt_model,
-            )
+            transcription = _stt_transcribe(voice_file, config)
-            transcription = _stt_transcribe(voice_file, config)
+            transcription = sanitize_for_prompt(_stt_transcribe(voice_file, config))
-            transcription = _stt_transcribe(voice_file, config)
+            transcription = sanitize_for_prompt(_stt_transcribe(voice_file, config))
             if not transcription:
                 client.send_message(
                     msg.chat_id,
@@ -743,7 +784,7 @@ def _typing_loop():
         # -- Response delivery --
 
         if response:
-            if has_voice and config.elevenlabs_api_key:
+            if has_voice and _get_speech_api_key(config):
                 _deliver_voice_response(
                     response, config, client, msg, platform,
                     tmp_dir, tmp_files, bot_id,
@@ -787,15 +828,15 @@ def _typing_loop():
 def _deliver_voice_response(response, config, client, msg, platform,
                             tmp_dir, tmp_files, bot_id):
     """Convert response to voice/audio and send, falling back to text."""
+    tts_ext = '.wav' if config.speech_provider == 'speechmatics' else '.mp3'
     fd, tts_file = tempfile.mkstemp(
-        prefix='claudio-tts-', suffix='.mp3', dir=tmp_dir,
+        prefix='claudio-tts-', suffix=tts_ext, dir=tmp_dir,
     )
     os.close(fd)
     os.chmod(tts_file, 0o600)
     tmp_files.append(tts_file)
 
-    if tts_convert(response, tts_file, config.elevenlabs_api_key,
-                   config.elevenlabs_voice_id, config.elevenlabs_model):
+    if _tts_convert(response, tts_file, config):
         if platform == 'telegram':
             ok = client.send_voice(msg.chat_id, tts_file, reply_to=msg.message_id)
         else: