From d09bf48ee2d53de04bab7470f69b2de94897f210 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 14:11:06 -0800
Subject: [PATCH 01/16] feat: add support for soniox STT

---
 .gitignore                                    |   1 +
 backend/.env.example                          |   3 +
 backend/package-lock.json                     |  11 -
 backend/src/config/server.ts                  |  54 ++
 .../configs/flashcard-generation-graph.json   |  91 ---
 .../lang-learning-conversation-graph.json     | 289 --------
 .../configs/response-feedback-graph.json      |  91 ---
 backend/src/graphs/conversation-graph.ts      | 100 ++-
 .../graphs/nodes/assembly-ai-stt-ws-node.ts   |   3 +-
 .../src/graphs/nodes/soniox-stt-ws-node.ts    | 692 ++++++++++++++++++
 backend/src/graphs/nodes/stt-node.ts          |   9 +
 backend/src/helpers/connection-manager.ts     |   6 +-
 backend/src/server.ts                         |   3 +-
 backend/src/services/graph-service.ts         |  30 +-
 frontend/package-lock.json                    |  16 -
 render.yaml                                   |   4 +
 16 files changed, 858 insertions(+), 545 deletions(-)
 delete mode 100644 backend/src/graphs/configs/flashcard-generation-graph.json
 delete mode 100644 backend/src/graphs/configs/lang-learning-conversation-graph.json
 delete mode 100644 backend/src/graphs/configs/response-feedback-graph.json
 create mode 100644 backend/src/graphs/nodes/soniox-stt-ws-node.ts
 create mode 100644 backend/src/graphs/nodes/stt-node.ts

diff --git a/.gitignore b/.gitignore
index 6c4e867..c54347b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -140,6 +140,7 @@ vite.config.ts.timestamp-*
 
 # Project specific
 backend/audio/
+backend/src/graphs/configs/
 .DS_Store
 CLAUDE.md
 templates/
diff --git a/backend/.env.example b/backend/.env.example
index 2488c28..0cfaf91 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -1,5 +1,8 @@
 INWORLD_API_KEY=
+
+STT_PROVIDER=assembly
 ASSEMBLY_AI_API_KEY=
+SONIOX_API_KEY=
 
 SUPABASE_URL=
 SUPABASE_SECRET_KEY=
\ No newline at end of file
diff --git a/backend/package-lock.json b/backend/package-lock.json
index 51c3e72..0672fc4 100644
--- a/backend/package-lock.json
+++ b/backend/package-lock.json
@@ -1695,7 +1695,6 @@
       "integrity": "sha512-tK3GPFWbirvNgsNKto+UmB/cRtn6TZfyw0D6IKrW55n6Vbs7KJoZtI//kpTKzE/DUmmnAFD8/Ca46s7Obs92/w==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@typescript-eslint/scope-manager": "8.46.4",
         "@typescript-eslint/types": "8.46.4",
@@ -2191,7 +2190,6 @@
       "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -3222,7 +3220,6 @@
       "integrity": "sha512-BhHmn2yNOFA9H9JmmIVKJmd288g9hrVRDkdoIgRCRuSySRUHH7r/DI6aAXW9T1WwUuY3DFgrcaqB+deURBLR5g==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.8.0",
         "@eslint-community/regexpp": "^4.12.1",
@@ -3283,7 +3280,6 @@
       "integrity": "sha512-iI1f+D2ViGn+uvv5HuHVUamg8ll4tN+JRHGc6IJi4TP9Kl976C57fzPXgseXNs8v0iA8aSJpHsTWjDb9QJamGQ==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "bin": {
         "eslint-config-prettier": "bin/cli.js"
       },
@@ -5343,7 +5339,6 @@
       "integrity": "sha512-I7AIg5boAr5R0FFtJ6rCfD+LFsWHp81dolrFD8S79U9tb8Az2nGrJncnMSnys+bpQJfRUzqs9hnA81OAA3hCuQ==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "bin": {
         "prettier": "bin/prettier.cjs"
       },
@@ -6293,7 +6288,6 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -6394,7 +6388,6 @@
       "integrity": "sha512-ytQKuwgmrrkDTFP4LjR0ToE2nqgy886GpvRSpU0JAnrdBYppuY5rLkRUYPU1yCryb24SsKBTL/hlDQAEFVwtZg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "esbuild": "~0.25.0",
         "get-tsconfig": "^4.7.5"
@@ -6455,7 +6448,6 @@
       "integrity": "sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==",
       "dev": true,
       "license": "Apache-2.0",
-      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -6573,7 +6565,6 @@
       "integrity": "sha512-w+N7Hifpc3gRjZ63vYBXA56dvvRlNWRczTdmCBBa+CotUzAPf5b7YMdMR/8CQoeYE5LX3W4wj6RYTgonm1b9DA==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "esbuild": "^0.27.0",
         "fdir": "^6.5.0",
@@ -7151,7 +7142,6 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -7165,7 +7155,6 @@
       "integrity": "sha512-E4t7DJ9pESL6E3I8nFjPa4xGUd3PmiWDLsDztS2qXSJWfHtbQnwAWylaBvSNY48I3vr8PTqIZlyK8TE3V3CA4Q==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@vitest/expect": "4.0.16",
         "@vitest/mocker": "4.0.16",
diff --git a/backend/src/config/server.ts b/backend/src/config/server.ts
index aa6f9ac..dd71935 100644
--- a/backend/src/config/server.ts
+++ b/backend/src/config/server.ts
@@ -5,6 +5,8 @@
  * Environment variables can override defaults where appropriate.
  */
 
+export type STTProvider = 'assembly' | 'soniox';
+
 export interface AssemblyAITurnDetectionSettings {
   endOfTurnConfidenceThreshold: number;
   minEndOfTurnSilenceWhenConfident: number;
@@ -59,12 +61,46 @@ const assemblyAIPresets: Record<
   },
 };
 
+export interface SonioxEndpointSettings {
+  maxEndpointDelayMs: number;
+  languageHints: string[];
+  description: string;
+}
+
+/**
+ * Soniox endpoint detection presets mapped to the same eagerness levels.
+ * max_endpoint_delay_ms controls how quickly Soniox returns endpoints (500-3000ms).
+ * @see https://soniox.com/docs/stt/rt/endpoint-detection
+ */
+const sonioxPresets: Record<AssemblyAIEagerness, SonioxEndpointSettings> = {
+  high: {
+    maxEndpointDelayMs: 500,
+    languageHints: ['en', 'es'],
+    description: 'Aggressive - fastest endpoint detection (500ms)',
+  },
+  medium: {
+    maxEndpointDelayMs: 1000,
+    languageHints: ['en', 'es'],
+    description: 'Balanced - moderate endpoint delay (1000ms)',
+  },
+  low: {
+    maxEndpointDelayMs: 2000,
+    languageHints: ['en', 'es'],
+    description: 'Conservative - patient endpoint detection (2000ms)',
+  },
+};
+
 export const serverConfig = {
   /**
    * HTTP server port
    */
   port: Number(process.env.PORT) || 3000,
 
+  /**
+   * STT provider selection ('assembly' | 'soniox')
+   */
+  sttProvider: (process.env.STT_PROVIDER || 'assembly') as STTProvider,
+
   /**
    * Audio processing settings
    */
@@ -86,6 +122,15 @@ export const serverConfig = {
     formatTurns: false,
   },
 
+  /**
+   * Soniox speech-to-text configuration
+   */
+  soniox: {
+    /** Endpoint detection eagerness level (reuses the same 'low'|'medium'|'high' scale) */
+    eagerness: (process.env.SONIOX_EAGERNESS ||
+      'high') as AssemblyAIEagerness,
+  },
+
   /**
    * Telemetry configuration for Inworld Runtime
    */
@@ -111,3 +156,12 @@ export function getAssemblyAISettingsForEagerness(
 ): AssemblyAITurnDetectionSettings {
   return assemblyAIPresets[eagerness];
 }
+
+/**
+ * Get Soniox endpoint detection settings for the configured eagerness level.
+ * Reads SONIOX_EAGERNESS from process.env at call time (after dotenv loads).
+ */
+export function getSonioxSettings(): SonioxEndpointSettings {
+  const eagerness = (process.env.SONIOX_EAGERNESS || 'high') as AssemblyAIEagerness;
+  return sonioxPresets[eagerness];
+}
diff --git a/backend/src/graphs/configs/flashcard-generation-graph.json b/backend/src/graphs/configs/flashcard-generation-graph.json
deleted file mode 100644
index dc39045..0000000
--- a/backend/src/graphs/configs/flashcard-generation-graph.json
+++ /dev/null
@@ -1,91 +0,0 @@
-{
-  "schema_version": "1.2.2",
-  "main": {
-    "id": "flashcard-generation-graph",
-    "nodes": [
-      {
-        "type": "FlashcardPromptBuilderNodeType",
-        "id": "flashcard-prompt-builder",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "type": "TextToChatRequestNodeType",
-        "id": "text-to-chat-request",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "id": "llm_node",
-        "type": "LLMChatNode",
-        "execution_config": {
-          "type": "LLMChatNodeExecutionConfig",
-          "properties": {
-            "llm_component_id": "llm_node_llm_component",
-            "text_generation_config": {
-              "max_new_tokens": 2500,
-              "max_prompt_length": 2000,
-              "temperature": 1,
-              "top_p": 1,
-              "repetition_penalty": 1,
-              "frequency_penalty": 0,
-              "presence_penalty": 0
-            },
-            "stream": false,
-            "report_to_client": false,
-            "response_format": "text"
-          }
-        }
-      },
-      {
-        "type": "FlashcardParserNodeType",
-        "id": "flashcard-parser",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      }
-    ],
-    "edges": [
-      {
-        "from_node": "flashcard-prompt-builder",
-        "to_node": "text-to-chat-request"
-      },
-      {
-        "from_node": "text-to-chat-request",
-        "to_node": "llm_node"
-      },
-      {
-        "from_node": "llm_node",
-        "to_node": "flashcard-parser"
-      }
-    ],
-    "end_nodes": ["flashcard-parser"],
-    "start_nodes": ["flashcard-prompt-builder"]
-  },
-  "components": [
-    {
-      "id": "llm_node_llm_component",
-      "type": "LLMInterface",
-      "creation_config": {
-        "type": "RemoteLLMConfig",
-        "properties": {
-          "provider": "openai",
-          "model_name": "gpt-4.1-nano",
-          "default_config": {},
-          "api_key": "{{INWORLD_API_KEY}}"
-        }
-      }
-    }
-  ]
-}
diff --git a/backend/src/graphs/configs/lang-learning-conversation-graph.json b/backend/src/graphs/configs/lang-learning-conversation-graph.json
deleted file mode 100644
index 09b897f..0000000
--- a/backend/src/graphs/configs/lang-learning-conversation-graph.json
+++ /dev/null
@@ -1,289 +0,0 @@
-{
-  "schema_version": "1.2.2",
-  "main": {
-    "id": "lang-learning-conversation-graph",
-    "nodes": [
-      {
-        "id": "audio-input-proxy-lang-learning",
-        "type": "ProxyNode",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "type": "AssemblyAISTTWebSocketNodeType",
-        "id": "assembly-ai-stt-ws-node-lang-learning",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "type": "TranscriptExtractorNodeType",
-        "id": "transcript-extractor-node-lang-learning",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": true
-          }
-        }
-      },
-      {
-        "type": "InteractionQueueNodeType",
-        "id": "interaction-queue-node-lang-learning",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "type": "TextInputNodeType",
-        "id": "text-input-node-lang-learning",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": true
-          }
-        }
-      },
-      {
-        "type": "MemoryRetrievalNodeType",
-        "id": "memory-retrieval-node-lang-learning",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "type": "DialogPromptBuilderNodeType",
-        "id": "dialog-prompt-builder-node-lang-learning",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "id": "llm-node-lang-learning",
-        "type": "LLMChatNode",
-        "execution_config": {
-          "type": "LLMChatNodeExecutionConfig",
-          "properties": {
-            "llm_component_id": "llm-node-lang-learning_llm_component",
-            "text_generation_config": {
-              "max_new_tokens": 250,
-              "max_prompt_length": 2000,
-              "temperature": 1,
-              "top_p": 1,
-              "repetition_penalty": 1,
-              "frequency_penalty": 0,
-              "presence_penalty": 0
-            },
-            "stream": true,
-            "report_to_client": true,
-            "response_format": "text"
-          }
-        }
-      },
-      {
-        "id": "text-chunking-node-lang-learning",
-        "type": "TextChunkingNode",
-        "execution_config": {
-          "type": "TextChunkingNodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "id": "text-aggregator-node-lang-learning",
-        "type": "TextAggregatorNode",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "type": "TTSRequestBuilderNodeType",
-        "id": "tts-request-builder-node-lang-learning",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "id": "tts-node-lang-learning",
-        "type": "TTSNode",
-        "execution_config": {
-          "type": "TTSNodeExecutionConfig",
-          "properties": {
-            "tts_component_id": "tts-node-lang-learning_tts_component",
-            "voice": {
-              "id": "Rafael",
-              "language_code": "es-MX"
-            },
-            "synthesis_config": {
-              "type": "inworld",
-              "config": {
-                "model_id": "inworld-tts-1.5-max",
-                "inference": {
-                  "speaking_rate": 1,
-                  "temperature": 1.1
-                },
-                "postprocessing": {
-                  "sample_rate": 22050
-                }
-              }
-            },
-            "report_to_client": true
-          }
-        }
-      },
-      {
-        "type": "StateUpdateNodeType",
-        "id": "state-update-node-lang-learning",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": true
-          }
-        }
-      }
-    ],
-    "edges": [
-      {
-        "from_node": "audio-input-proxy-lang-learning",
-        "to_node": "assembly-ai-stt-ws-node-lang-learning"
-      },
-      {
-        "from_node": "assembly-ai-stt-ws-node-lang-learning",
-        "to_node": "assembly-ai-stt-ws-node-lang-learning",
-        "condition_id": "custom-condition-from-assembly-ai-stt-ws-node-lang-learning-to-assembly-ai-stt-ws-node-lang-learning",
-        "optional": true,
-        "loop": true
-      },
-      {
-        "from_node": "assembly-ai-stt-ws-node-lang-learning",
-        "to_node": "transcript-extractor-node-lang-learning",
-        "condition_id": "custom-condition-from-assembly-ai-stt-ws-node-lang-learning-to-transcript-extractor-node-lang-learning"
-      },
-      {
-        "from_node": "transcript-extractor-node-lang-learning",
-        "to_node": "interaction-queue-node-lang-learning"
-      },
-      {
-        "from_node": "interaction-queue-node-lang-learning",
-        "to_node": "text-input-node-lang-learning",
-        "condition_id": "custom-condition-from-interaction-queue-node-lang-learning-to-text-input-node-lang-learning"
-      },
-      {
-        "from_node": "text-input-node-lang-learning",
-        "to_node": "memory-retrieval-node-lang-learning"
-      },
-      {
-        "from_node": "memory-retrieval-node-lang-learning",
-        "to_node": "dialog-prompt-builder-node-lang-learning"
-      },
-      {
-        "from_node": "text-input-node-lang-learning",
-        "to_node": "tts-request-builder-node-lang-learning"
-      },
-      {
-        "from_node": "dialog-prompt-builder-node-lang-learning",
-        "to_node": "llm-node-lang-learning"
-      },
-      {
-        "from_node": "llm-node-lang-learning",
-        "to_node": "text-chunking-node-lang-learning"
-      },
-      {
-        "from_node": "llm-node-lang-learning",
-        "to_node": "text-aggregator-node-lang-learning"
-      },
-      {
-        "from_node": "text-chunking-node-lang-learning",
-        "to_node": "tts-request-builder-node-lang-learning"
-      },
-      {
-        "from_node": "tts-request-builder-node-lang-learning",
-        "to_node": "tts-node-lang-learning"
-      },
-      {
-        "from_node": "text-aggregator-node-lang-learning",
-        "to_node": "state-update-node-lang-learning"
-      },
-      {
-        "from_node": "state-update-node-lang-learning",
-        "to_node": "interaction-queue-node-lang-learning",
-        "optional": true,
-        "loop": true
-      }
-    ],
-    "end_nodes": ["tts-node-lang-learning"],
-    "start_nodes": ["audio-input-proxy-lang-learning"]
-  },
-  "components": [
-    {
-      "id": "llm-node-lang-learning_llm_component",
-      "type": "LLMInterface",
-      "creation_config": {
-        "type": "RemoteLLMConfig",
-        "properties": {
-          "provider": "openai",
-          "model_name": "gpt-4.1-nano",
-          "default_config": {},
-          "api_key": "{{INWORLD_API_KEY}}"
-        }
-      }
-    },
-    {
-      "id": "tts-node-lang-learning_tts_component",
-      "type": "TTSInterface",
-      "creation_config": {
-        "type": "RemoteTTSConfig",
-        "properties": {
-          "synthesis_config": {
-            "type": "inworld",
-            "config": {
-              "model_id": "inworld-tts-1.5-max",
-              "inference": {
-                "speaking_rate": 1,
-                "temperature": 1.1
-              },
-              "postprocessing": {
-                "sample_rate": 22050
-              }
-            }
-          },
-          "api_key": "{{INWORLD_API_KEY}}"
-        }
-      }
-    },
-    {
-      "id": "custom-condition-from-assembly-ai-stt-ws-node-lang-learning-to-assembly-ai-stt-ws-node-lang-learning",
-      "type": "custom-condition-from-assembly-ai-stt-ws-node-lang-learning-to-assembly-ai-stt-ws-node-lang-learning"
-    },
-    {
-      "id": "custom-condition-from-assembly-ai-stt-ws-node-lang-learning-to-transcript-extractor-node-lang-learning",
-      "type": "custom-condition-from-assembly-ai-stt-ws-node-lang-learning-to-transcript-extractor-node-lang-learning"
-    },
-    {
-      "id": "custom-condition-from-interaction-queue-node-lang-learning-to-text-input-node-lang-learning",
-      "type": "custom-condition-from-interaction-queue-node-lang-learning-to-text-input-node-lang-learning"
-    }
-  ]
-}
diff --git a/backend/src/graphs/configs/response-feedback-graph.json b/backend/src/graphs/configs/response-feedback-graph.json
deleted file mode 100644
index d694a2b..0000000
--- a/backend/src/graphs/configs/response-feedback-graph.json
+++ /dev/null
@@ -1,91 +0,0 @@
-{
-  "schema_version": "1.2.2",
-  "main": {
-    "id": "response-feedback-graph",
-    "nodes": [
-      {
-        "type": "FeedbackPromptBuilderNodeType",
-        "id": "feedback-prompt-builder",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "type": "TextToChatRequestNodeType",
-        "id": "text-to-chat-request",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      },
-      {
-        "id": "llm-node",
-        "type": "LLMChatNode",
-        "execution_config": {
-          "type": "LLMChatNodeExecutionConfig",
-          "properties": {
-            "llm_component_id": "llm-node_llm_component",
-            "text_generation_config": {
-              "max_new_tokens": 100,
-              "max_prompt_length": 2000,
-              "temperature": 0.7,
-              "top_p": 1,
-              "repetition_penalty": 1,
-              "frequency_penalty": 0,
-              "presence_penalty": 0
-            },
-            "stream": false,
-            "report_to_client": false,
-            "response_format": "text"
-          }
-        }
-      },
-      {
-        "type": "FeedbackExtractorNodeType",
-        "id": "feedback-extractor",
-        "execution_config": {
-          "type": "NodeExecutionConfig",
-          "properties": {
-            "report_to_client": false
-          }
-        }
-      }
-    ],
-    "edges": [
-      {
-        "from_node": "feedback-prompt-builder",
-        "to_node": "text-to-chat-request"
-      },
-      {
-        "from_node": "text-to-chat-request",
-        "to_node": "llm-node"
-      },
-      {
-        "from_node": "llm-node",
-        "to_node": "feedback-extractor"
-      }
-    ],
-    "end_nodes": ["feedback-extractor"],
-    "start_nodes": ["feedback-prompt-builder"]
-  },
-  "components": [
-    {
-      "id": "llm-node_llm_component",
-      "type": "LLMInterface",
-      "creation_config": {
-        "type": "RemoteLLMConfig",
-        "properties": {
-          "provider": "openai",
-          "model_name": "gpt-4.1-nano",
-          "default_config": {},
-          "api_key": "{{INWORLD_API_KEY}}"
-        }
-      }
-    }
-  ]
-}
diff --git a/backend/src/graphs/conversation-graph.ts b/backend/src/graphs/conversation-graph.ts
index 1c4f578..938392f 100644
--- a/backend/src/graphs/conversation-graph.ts
+++ b/backend/src/graphs/conversation-graph.ts
@@ -2,13 +2,13 @@
  * Conversation Graph for Language Learning App - Inworld Runtime 0.9
  *
  * This is a long-running circular graph that:
- * - Processes continuous audio streams via AssemblyAI STT with built-in VAD
+ * - Processes continuous audio streams via STT (AssemblyAI or Soniox) with built-in VAD
  * - Queues interactions for sequential processing
  * - Uses language-specific prompts and TTS voices
  * - Loops back for the next interaction automatically
  *
  * Graph Flow:
- * AudioInput → AssemblyAI STT (loop) → TranscriptExtractor → InteractionQueue
+ * AudioInput → STT (loop) → TranscriptExtractor → InteractionQueue
  *    → TextInput → DialogPromptBuilder → LLM → TextChunking → TTSRequestBuilder → TTS
  *    → TextAggregator → StateUpdate → (loop back to InteractionQueue)
  */
@@ -16,6 +16,7 @@
 import {
   Graph,
   GraphBuilder,
+  CustomNode,
   ProxyNode,
   RemoteLLMChatNode,
   RemoteTTSNode,
@@ -24,6 +25,8 @@ import {
 } from '@inworld/runtime/graph';
 
 import { AssemblyAISTTWebSocketNode } from './nodes/assembly-ai-stt-ws-node.js';
+import { SonioxSTTWebSocketNode } from './nodes/soniox-stt-ws-node.js';
+import { STTNode } from './nodes/stt-node.js';
 import { DialogPromptBuilderNode } from './nodes/dialog-prompt-builder-node.js';
 import { InteractionQueueNode } from './nodes/interaction-queue-node.js';
 import { MemoryRetrievalNode } from './nodes/memory-retrieval-node.js';
@@ -37,33 +40,39 @@ import {
   DEFAULT_LANGUAGE_CODE,
 } from '../config/languages.js';
 import { llmConfig } from '../config/llm.js';
-import { serverConfig, getAssemblyAISettings } from '../config/server.js';
+import {
+  serverConfig,
+  getAssemblyAISettings,
+  getSonioxSettings,
+  STTProvider,
+} from '../config/server.js';
 import { graphLogger as logger } from '../utils/logger.js';
 
 export interface ConversationGraphConfig {
-  assemblyAIApiKey: string;
+  sttProvider: STTProvider;
+  sttApiKey: string;
   connections: ConnectionsMap;
   defaultLanguageCode?: string;
 }
 
 /**
- * Wrapper class for the conversation graph
- * Provides access to the graph and the AssemblyAI node for session management
+ * Wrapper class for the conversation graph.
+ * Provides access to the graph and the STT node for session management.
  */
 export class ConversationGraphWrapper {
   graph: Graph;
-  assemblyAINode: AssemblyAISTTWebSocketNode;
+  sttNode: STTNode;
 
   private constructor(params: {
     graph: Graph;
-    assemblyAINode: AssemblyAISTTWebSocketNode;
+    sttNode: STTNode;
   }) {
     this.graph = params.graph;
-    this.assemblyAINode = params.assemblyAINode;
+    this.sttNode = params.sttNode;
   }
 
   async destroy(): Promise<void> {
-    await this.assemblyAINode.destroy();
+    await this.sttNode.destroy();
     await this.graph.stop();
   }
 
@@ -73,15 +82,19 @@ export class ConversationGraphWrapper {
   static create(config: ConversationGraphConfig): ConversationGraphWrapper {
     const {
       connections,
-      assemblyAIApiKey,
+      sttProvider,
+      sttApiKey,
       defaultLanguageCode = DEFAULT_LANGUAGE_CODE,
     } = config;
-    // Use provided language code or default to Spanish
     const langConfig = getLanguageConfig(defaultLanguageCode);
     const postfix = `-lang-learning`;
 
     logger.info(
-      { language: langConfig.name, languageCode: defaultLanguageCode },
+      {
+        language: langConfig.name,
+        languageCode: defaultLanguageCode,
+        sttProvider,
+      },
       'creating_conversation_graph'
     );
 
@@ -89,25 +102,40 @@ export class ConversationGraphWrapper {
     // Create Nodes
     // ============================================================
 
-    // Start node (audio input proxy)
     const audioInputNode = new ProxyNode({ id: `audio-input-proxy${postfix}` });
 
-    // AssemblyAI STT with built-in VAD (always uses multilingual model)
-    const turnDetectionSettings = getAssemblyAISettings();
-    const assemblyAISTTNode = new AssemblyAISTTWebSocketNode({
-      id: `assembly-ai-stt-ws-node${postfix}`,
-      config: {
-        apiKey: assemblyAIApiKey,
-        connections: connections,
-        sampleRate: serverConfig.audio.inputSampleRate,
-        formatTurns: serverConfig.assemblyAI.formatTurns,
-        endOfTurnConfidenceThreshold:
-          turnDetectionSettings.endOfTurnConfidenceThreshold,
-        minEndOfTurnSilenceWhenConfident:
-          turnDetectionSettings.minEndOfTurnSilenceWhenConfident,
-        maxTurnSilence: turnDetectionSettings.maxTurnSilence,
-      },
-    });
+    // Create STT node based on provider
+    let sttCustomNode: CustomNode & STTNode;
+
+    if (sttProvider === 'soniox') {
+      const sonioxSettings = getSonioxSettings();
+      sttCustomNode = new SonioxSTTWebSocketNode({
+        id: `stt-ws-node${postfix}`,
+        config: {
+          apiKey: sttApiKey,
+          connections: connections,
+          sampleRate: serverConfig.audio.inputSampleRate,
+          maxEndpointDelayMs: sonioxSettings.maxEndpointDelayMs,
+          languageHints: sonioxSettings.languageHints,
+        },
+      });
+    } else {
+      const turnDetectionSettings = getAssemblyAISettings();
+      sttCustomNode = new AssemblyAISTTWebSocketNode({
+        id: `stt-ws-node${postfix}`,
+        config: {
+          apiKey: sttApiKey,
+          connections: connections,
+          sampleRate: serverConfig.audio.inputSampleRate,
+          formatTurns: serverConfig.assemblyAI.formatTurns,
+          endOfTurnConfidenceThreshold:
+            turnDetectionSettings.endOfTurnConfidenceThreshold,
+          minEndOfTurnSilenceWhenConfident:
+            turnDetectionSettings.minEndOfTurnSilenceWhenConfident,
+          maxTurnSilence: turnDetectionSettings.maxTurnSilence,
+        },
+      });
+    }
 
     const transcriptExtractorNode = new TranscriptExtractorNode({
       id: `transcript-extractor-node${postfix}`,
@@ -190,7 +218,7 @@ export class ConversationGraphWrapper {
     graphBuilder
       // Add all nodes
       .addNode(audioInputNode)
-      .addNode(assemblyAISTTNode)
+      .addNode(sttCustomNode)
       .addNode(transcriptExtractorNode)
       .addNode(interactionQueueNode)
       .addNode(textInputNode)
@@ -206,10 +234,10 @@ export class ConversationGraphWrapper {
       // ============================================================
       // Audio Input Flow (STT with VAD)
       // ============================================================
-      .addEdge(audioInputNode, assemblyAISTTNode)
+      .addEdge(audioInputNode, sttCustomNode)
 
-      // AssemblyAI loops back to itself while stream is active
-      .addEdge(assemblyAISTTNode, assemblyAISTTNode, {
+      // STT loops back to itself while stream is active
+      .addEdge(sttCustomNode, sttCustomNode, {
         condition: async (input: unknown) => {
           const data = input as { stream_exhausted?: boolean };
           return data?.stream_exhausted !== true;
@@ -219,7 +247,7 @@ export class ConversationGraphWrapper {
       })
 
       // When interaction is complete, extract transcript
-      .addEdge(assemblyAISTTNode, transcriptExtractorNode, {
+      .addEdge(sttCustomNode, transcriptExtractorNode, {
         condition: async (input: unknown) => {
           const data = input as { interaction_complete?: boolean };
           return data?.interaction_complete === true;
@@ -283,7 +311,7 @@ export class ConversationGraphWrapper {
 
     return new ConversationGraphWrapper({
       graph,
-      assemblyAINode: assemblyAISTTNode,
+      sttNode: sttCustomNode,
     });
   }
 }
diff --git a/backend/src/graphs/nodes/assembly-ai-stt-ws-node.ts b/backend/src/graphs/nodes/assembly-ai-stt-ws-node.ts
index 3b566e9..3d0169f 100644
--- a/backend/src/graphs/nodes/assembly-ai-stt-ws-node.ts
+++ b/backend/src/graphs/nodes/assembly-ai-stt-ws-node.ts
@@ -6,6 +6,7 @@ import { v4 as uuidv4 } from 'uuid';
 import { Connection } from '../../types/index.js';
 import { audioDataToPCM16 } from '../../helpers/audio-utils.js';
 import { createLogger } from '../../utils/logger.js';
+import { STTNode } from './stt-node.js';
 
 const logger = createLogger('AssemblyAI');
 
@@ -224,7 +225,7 @@ class AssemblyAISession {
  * - Detects turn endings using Assembly.AI's neural turn detection
  * - Returns DataStreamWithMetadata with transcribed text when a turn completes
  */
-export class AssemblyAISTTWebSocketNode extends CustomNode {
+export class AssemblyAISTTWebSocketNode extends CustomNode implements STTNode {
   private apiKey: string;
   private connections: { [sessionId: string]: Connection };
   private sampleRate: number;
diff --git a/backend/src/graphs/nodes/soniox-stt-ws-node.ts b/backend/src/graphs/nodes/soniox-stt-ws-node.ts
new file mode 100644
index 0000000..e78ce48
--- /dev/null
+++ b/backend/src/graphs/nodes/soniox-stt-ws-node.ts
@@ -0,0 +1,692 @@
+import { DataStreamWithMetadata } from '@inworld/runtime';
+import { CustomNode, GraphTypes, ProcessContext } from '@inworld/runtime/graph';
+import WebSocket from 'ws';
+import { v4 as uuidv4 } from 'uuid';
+
+import { Connection } from '../../types/index.js';
+import { audioDataToPCM16 } from '../../helpers/audio-utils.js';
+import { createLogger } from '../../utils/logger.js';
+import { STTNode } from './stt-node.js';
+
+const logger = createLogger('Soniox');
+
+const SONIOX_WEBSOCKET_URL = 'wss://stt-rt.soniox.com/transcribe-websocket';
+const SONIOX_MODEL = 'stt-rt-v4';
+
+/**
+ * Configuration interface for SonioxSTTWebSocketNode
+ */
+export interface SonioxSTTWebSocketNodeConfig {
+  /** Soniox API key */
+  apiKey: string;
+  /** Connections map to access session state */
+  connections: { [sessionId: string]: Connection };
+  /** Sample rate of the audio stream in Hz */
+  sampleRate?: number;
+  /** Maximum endpoint delay in milliseconds (500-3000, default 2000) */
+  maxEndpointDelayMs?: number;
+  /** Language hints for improved accuracy (e.g. ['en', 'es']) */
+  languageHints?: string[];
+}
+
+/**
+ * Manages a persistent WebSocket connection to Soniox for a single session.
+ */
+class SonioxSession {
+  private ws: WebSocket | null = null;
+  private wsReady: boolean = false;
+  private wsConnectionPromise: Promise<void> | null = null;
+
+  public shouldStopProcessing: boolean = false;
+
+  private inactivityTimeout: NodeJS.Timeout | null = null;
+  private keepaliveInterval: NodeJS.Timeout | null = null;
+  private lastActivityTime: number = Date.now();
+  private readonly INACTIVITY_TIMEOUT_MS = 60000;
+  private readonly KEEPALIVE_INTERVAL_MS = 5000;
+
+  constructor(
+    public readonly sessionId: string,
+    private apiKey: string,
+    private sampleRate: number,
+    private maxEndpointDelayMs: number,
+    private languageHints: string[]
+  ) {}
+
+  public async ensureConnection(): Promise<void> {
+    if (
+      !this.ws ||
+      !this.wsReady ||
+      this.ws.readyState !== WebSocket.OPEN
+    ) {
+      this.closeWebSocket();
+      this.initializeWebSocket();
+    }
+
+    if (this.wsConnectionPromise) {
+      await this.wsConnectionPromise;
+    }
+
+    this.shouldStopProcessing = false;
+    this.resetInactivityTimer();
+  }
+
+  private initializeWebSocket(): void {
+    logger.debug({ sessionId: this.sessionId }, 'initializing_websocket');
+
+    this.wsConnectionPromise = new Promise<void>((resolve, reject) => {
+      this.ws = new WebSocket(SONIOX_WEBSOCKET_URL);
+
+      this.ws.on('open', () => {
+        logger.debug({ sessionId: this.sessionId }, 'websocket_opened');
+
+        const config = {
+          api_key: this.apiKey,
+          model: SONIOX_MODEL,
+          audio_format: 'pcm_s16le',
+          sample_rate: this.sampleRate,
+          num_channels: 1,
+          enable_endpoint_detection: true,
+          max_endpoint_delay_ms: this.maxEndpointDelayMs,
+          language_hints: this.languageHints,
+          enable_language_identification: true,
+        };
+
+        this.ws!.send(JSON.stringify(config));
+        logger.debug(
+          {
+            model: SONIOX_MODEL,
+            sampleRate: this.sampleRate,
+            maxEndpointDelayMs: this.maxEndpointDelayMs,
+            languageHints: this.languageHints,
+          },
+          'config_sent'
+        );
+
+        this.wsReady = true;
+        this.startKeepalive();
+        resolve();
+      });
+
+      this.ws.on('error', (error: Error) => {
+        logger.error({ err: error }, 'websocket_error');
+        this.wsReady = false;
+        reject(error);
+      });
+
+      this.ws.on('close', (code: number, reason: Buffer) => {
+        logger.debug({ code, reason: reason.toString() }, 'websocket_closed');
+        this.wsReady = false;
+        this.stopKeepalive();
+      });
+    });
+  }
+
+  public onMessage(listener: (data: WebSocket.Data) => void): void {
+    if (this.ws) {
+      this.ws.on('message', listener);
+    }
+  }
+
+  public offMessage(listener: (data: WebSocket.Data) => void): void {
+    if (this.ws) {
+      this.ws.off('message', listener);
+    }
+  }
+
+  public sendAudio(pcm16Data: Int16Array): void {
+    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+      this.ws.send(Buffer.from(pcm16Data.buffer));
+      this.resetInactivityTimer();
+    }
+  }
+
+  public sendFinalize(): void {
+    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+      this.ws.send(JSON.stringify({ type: 'finalize' }));
+    }
+  }
+
+  private startKeepalive(): void {
+    this.stopKeepalive();
+    this.keepaliveInterval = setInterval(() => {
+      if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+        this.ws.send(JSON.stringify({ type: 'keepalive' }));
+      }
+    }, this.KEEPALIVE_INTERVAL_MS);
+  }
+
+  private stopKeepalive(): void {
+    if (this.keepaliveInterval) {
+      clearInterval(this.keepaliveInterval);
+      this.keepaliveInterval = null;
+    }
+  }
+
+  private resetInactivityTimer(): void {
+    if (this.inactivityTimeout) {
+      clearTimeout(this.inactivityTimeout);
+    }
+    this.lastActivityTime = Date.now();
+    this.inactivityTimeout = setTimeout(() => {
+      this.closeDueToInactivity();
+    }, this.INACTIVITY_TIMEOUT_MS);
+  }
+
+  public clearInactivityTimer(): void {
+    if (this.inactivityTimeout) {
+      clearTimeout(this.inactivityTimeout);
+      this.inactivityTimeout = null;
+    }
+  }
+
+  private closeDueToInactivity(): void {
+    const inactiveFor = Date.now() - this.lastActivityTime;
+    logger.info(
+      { sessionId: this.sessionId, inactiveMs: inactiveFor },
+      'closing_due_to_inactivity'
+    );
+    this.closeWebSocket();
+  }
+
+  private closeWebSocket(): void {
+    this.stopKeepalive();
+    if (this.ws) {
+      try {
+        this.ws.removeAllListeners();
+        if (this.ws.readyState === WebSocket.OPEN) {
+          // Send empty string to signal end-of-audio
+          this.ws.send('');
+          this.ws.close();
+        }
+      } catch (e) {
+        logger.warn({ err: e }, 'error_closing_socket');
+      }
+      this.ws = null;
+      this.wsReady = false;
+    }
+  }
+
+  public async close(): Promise<void> {
+    if (this.inactivityTimeout) {
+      clearTimeout(this.inactivityTimeout);
+    }
+
+    if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+      try {
+        // Signal end-of-audio
+        this.ws.send('');
+        await new Promise((resolve) => setTimeout(resolve, 100));
+      } catch {
+        // Ignore
+      }
+    }
+
+    this.closeWebSocket();
+  }
+}
+
+/**
+ * SonioxSTTWebSocketNode processes continuous multimodal streams using Soniox's
+ * streaming Speech-to-Text service via direct WebSocket connection.
+ *
+ * This node:
+ * - Receives MultimodalContent stream (audio and/or text)
+ * - For audio: extracts audio and feeds to Soniox streaming transcriber
+ * - For text: bypasses STT and returns text directly
+ * - Detects turn endings using Soniox's semantic endpoint detection
+ * - Returns DataStreamWithMetadata with transcribed text when a turn completes
+ */
+export class SonioxSTTWebSocketNode extends CustomNode implements STTNode {
+  private apiKey: string;
+  private connections: { [sessionId: string]: Connection };
+  private sampleRate: number;
+  private maxEndpointDelayMs: number;
+  private languageHints: string[];
+
+  private sessions: Map<string, SonioxSession> = new Map();
+  private readonly TURN_COMPLETION_TIMEOUT_MS = 2000;
+  private readonly MAX_TRANSCRIPTION_DURATION_MS = 40000;
+
+  constructor(props: {
+    id?: string;
+    config: SonioxSTTWebSocketNodeConfig;
+  }) {
+    const { config, ...nodeProps } = props;
+
+    if (!config.apiKey) {
+      throw new Error('SonioxSTTWebSocketNode requires an API key.');
+    }
+    if (!config.connections) {
+      throw new Error('SonioxSTTWebSocketNode requires a connections object.');
+    }
+
+    super({ id: nodeProps.id || 'soniox-stt-ws-node' });
+
+    this.apiKey = config.apiKey;
+    this.connections = config.connections;
+    this.sampleRate = config.sampleRate || 16000;
+    this.maxEndpointDelayMs = config.maxEndpointDelayMs ?? 2000;
+    this.languageHints = config.languageHints ?? [];
+
+    logger.info(
+      {
+        maxEndpointDelayMs: this.maxEndpointDelayMs,
+        languageHints: this.languageHints,
+      },
+      'stt_node_configured'
+    );
+  }
+
+  async process(
+    context: ProcessContext,
+    input0: AsyncIterableIterator<GraphTypes.MultimodalContent>,
+    input: DataStreamWithMetadata
+  ): Promise<DataStreamWithMetadata> {
+    const multimodalStream =
+      input !== undefined &&
+      input !== null &&
+      input instanceof DataStreamWithMetadata
+        ? (input.toStream() as unknown as AsyncIterableIterator<GraphTypes.MultimodalContent>)
+        : input0;
+
+    const sessionId = context.getDatastore().get('sessionId') as string;
+    const connection = this.connections[sessionId];
+
+    if (connection?.unloaded) {
+      throw Error(`Session unloaded for sessionId: ${sessionId}`);
+    }
+    if (!connection) {
+      throw Error(`Failed to read connection for sessionId: ${sessionId}`);
+    }
+
+    const metadata = input?.getMetadata?.() || {};
+    let previousIteration = (metadata.iteration as number) || 0;
+
+    if (
+      !connection.state.interactionId ||
+      connection.state.interactionId === ''
+    ) {
+      connection.state.interactionId = uuidv4();
+    }
+
+    const currentId = connection.state.interactionId;
+    const delimiterIndex = currentId.indexOf('#');
+
+    if (previousIteration === 0 && delimiterIndex !== -1) {
+      const iterationStr = currentId.substring(delimiterIndex + 1);
+      const parsedIteration = parseInt(iterationStr, 10);
+      if (!isNaN(parsedIteration) && /^\d+$/.test(iterationStr)) {
+        previousIteration = parsedIteration;
+      }
+    }
+
+    const iteration = previousIteration + 1;
+    const baseId =
+      delimiterIndex !== -1
+        ? currentId.substring(0, delimiterIndex)
+        : currentId;
+    const nextInteractionId = `${baseId}#${iteration}`;
+
+    logger.debug({ iteration }, 'starting_transcription');
+
+    // State tracking
+    let transcriptText = '';
+    let turnDetected = false;
+    let speechDetected = false;
+    let audioChunkCount = 0;
+    let totalAudioSamples = 0;
+    let isStreamExhausted = false;
+    let errorOccurred = false;
+    let errorMessage = '';
+    let maxDurationReached = false;
+    let isTextInput = false;
+    let textContent: string | undefined;
+
+    // Soniox token accumulation
+    let finalTokenTexts: string[] = [];
+
+    // Get or create session
+    let session = this.sessions.get(sessionId);
+    if (!session) {
+      session = new SonioxSession(
+        sessionId,
+        this.apiKey,
+        this.sampleRate,
+        this.maxEndpointDelayMs,
+        this.languageHints
+      );
+      this.sessions.set(sessionId, session);
+    }
+
+    // Promise to capture turn result
+    let turnResolve: (value: string) => void = () => {};
+    let turnReject: (error: Error) => void = () => {};
+    let turnCompleted = false;
+    const turnPromise = new Promise<string>((resolve, reject) => {
+      turnResolve = resolve;
+      turnReject = reject;
+    });
+    const turnPromiseWithState = turnPromise.then((value) => {
+      turnCompleted = true;
+      return value;
+    });
+
+    // Soniox message handler for this process() call
+    const messageHandler = (data: WebSocket.Data) => {
+      try {
+        const message = JSON.parse(data.toString());
+
+        if (message.error_code) {
+          logger.error(
+            { code: message.error_code, msg: message.error_message },
+            'soniox_error'
+          );
+          errorOccurred = true;
+          errorMessage = `${message.error_code}: ${message.error_message}`;
+          return;
+        }
+
+        if (session?.shouldStopProcessing) {
+          return;
+        }
+
+        const tokens = message.tokens;
+        if (!tokens || !Array.isArray(tokens) || tokens.length === 0) {
+          return;
+        }
+
+        let endpointDetected = false;
+        const nonFinalTexts: string[] = [];
+
+        for (const token of tokens) {
+          const text = token.text || '';
+
+          if (token.is_final) {
+            // <end> token signals endpoint detection
+            if (text === '<end>') {
+              endpointDetected = true;
+            } else {
+              finalTokenTexts.push(text);
+            }
+          } else {
+            nonFinalTexts.push(text);
+          }
+        }
+
+        // Trigger speech detected on first meaningful text
+        if (!speechDetected && (nonFinalTexts.length > 0 || finalTokenTexts.length > 0)) {
+          const hasText = nonFinalTexts.some((t) => t.trim().length > 0) ||
+            finalTokenTexts.some((t) => t.trim().length > 0);
+          if (hasText) {
+            speechDetected = true;
+            logger.debug({ iteration }, 'speech_detected');
+            if (connection?.onSpeechDetected) {
+              connection.onSpeechDetected(nextInteractionId);
+            }
+          }
+        }
+
+        // Send partial transcript from non-final tokens
+        if (nonFinalTexts.length > 0) {
+          const partialText = [...finalTokenTexts, ...nonFinalTexts].join('').trim();
+          if (partialText) {
+            this.sendPartialTranscript(
+              sessionId,
+              nextInteractionId,
+              partialText
+            );
+          }
+        }
+
+        if (endpointDetected) {
+          let finalTranscript = finalTokenTexts.join('').trim();
+
+          // Check for pending transcript to stitch
+          if (connection?.pendingTranscript) {
+            finalTranscript =
+              `${connection.pendingTranscript} ${finalTranscript}`.trim();
+            logger.debug(
+              {
+                iteration,
+                transcriptSnippet: finalTranscript.substring(0, 80),
+              },
+              'stitched_transcript'
+            );
+            connection.pendingTranscript = undefined;
+          } else {
+            logger.debug(
+              { iteration, transcriptSnippet: finalTranscript.substring(0, 50) },
+              'endpoint_detected'
+            );
+          }
+
+          if (connection) {
+            connection.isProcessingInterrupted = false;
+          }
+
+          transcriptText = finalTranscript;
+          turnDetected = true;
+          if (session) session.shouldStopProcessing = true;
+          turnResolve(finalTranscript);
+        }
+      } catch (error) {
+        logger.error({ err: error }, 'error_handling_message');
+      }
+    };
+
+    try {
+      await session.ensureConnection();
+      session.onMessage(messageHandler);
+
+      const audioProcessingPromise = (async () => {
+        let maxDurationTimeout: NodeJS.Timeout | null = null;
+        try {
+          maxDurationTimeout = setTimeout(() => {
+            maxDurationReached = true;
+          }, this.MAX_TRANSCRIPTION_DURATION_MS);
+
+          while (true) {
+            if (session?.shouldStopProcessing) break;
+
+            if (maxDurationReached && !transcriptText) {
+              logger.warn(
+                { maxDurationMs: this.MAX_TRANSCRIPTION_DURATION_MS },
+                'max_transcription_duration_reached'
+              );
+              break;
+            }
+
+            const result = await multimodalStream.next();
+
+            if (result.done) {
+              logger.debug(
+                { iteration, audioChunkCount },
+                'multimodal_stream_exhausted'
+              );
+              isStreamExhausted = true;
+              break;
+            }
+
+            if (session?.shouldStopProcessing) break;
+
+            const content = result.value as GraphTypes.MultimodalContent;
+
+            // Handle text input
+            if (content.text !== undefined && content.text !== null) {
+              logger.debug(
+                { iteration, textSnippet: content.text.substring(0, 50) },
+                'text_input_detected'
+              );
+              isTextInput = true;
+              textContent = content.text;
+              transcriptText = content.text;
+              turnDetected = true;
+              if (session) {
+                session.shouldStopProcessing = true;
+                session.clearInactivityTimer();
+              }
+              turnResolve(transcriptText);
+              break;
+            }
+
+            // Extract audio
+            if (content.audio === undefined || content.audio === null) continue;
+
+            const audioData = content.audio.data;
+            if (!audioData || audioData.length === 0) continue;
+
+            audioChunkCount++;
+            totalAudioSamples += audioData.length;
+
+            const pcm16Data = audioDataToPCM16(audioData);
+            session?.sendAudio(pcm16Data);
+          }
+        } catch (error) {
+          logger.error({ err: error }, 'error_processing_audio');
+          errorOccurred = true;
+          errorMessage = error instanceof Error ? error.message : String(error);
+          throw error;
+        } finally {
+          if (maxDurationTimeout) {
+            clearTimeout(maxDurationTimeout);
+          }
+        }
+      })();
+
+      const raceResult = await Promise.race([
+        turnPromiseWithState.then(() => ({ winner: 'turn' as const })),
+        audioProcessingPromise.then(() => ({ winner: 'audio' as const })),
+      ]);
+
+      if (
+        raceResult.winner === 'audio' &&
+        !turnCompleted &&
+        !maxDurationReached
+      ) {
+        logger.debug(
+          { waitMs: this.TURN_COMPLETION_TIMEOUT_MS },
+          'audio_ended_before_turn_waiting'
+        );
+
+        // Send finalize to force Soniox to return any remaining tokens
+        session.sendFinalize();
+
+        const timeoutPromise = new Promise<{ winner: 'timeout' }>((resolve) =>
+          setTimeout(
+            () => resolve({ winner: 'timeout' }),
+            this.TURN_COMPLETION_TIMEOUT_MS
+          )
+        );
+
+        const waitResult = await Promise.race([
+          turnPromiseWithState.then(() => ({ winner: 'turn' as const })),
+          timeoutPromise,
+        ]);
+
+        if (waitResult.winner === 'timeout' && !turnCompleted) {
+          logger.warn('timed_out_waiting_for_turn');
+          turnReject?.(new Error('Timed out waiting for turn completion'));
+        }
+      }
+
+      await audioProcessingPromise.catch(() => {});
+
+      logger.debug(
+        { iteration, transcriptSnippet: transcriptText?.substring(0, 50) },
+        'transcription_complete'
+      );
+
+      if (turnDetected) {
+        connection.state.interactionId = '';
+      }
+
+      const taggedStream = Object.assign(multimodalStream, {
+        type: 'MultimodalContent',
+        abort: () => {},
+        getMetadata: () => ({}),
+      });
+
+      return new DataStreamWithMetadata(taggedStream, {
+        elementType: 'MultimodalContent',
+        iteration: iteration,
+        interactionId: nextInteractionId,
+        session_id: sessionId,
+        transcript: transcriptText,
+        turn_detected: turnDetected,
+        audio_chunk_count: audioChunkCount,
+        total_audio_samples: totalAudioSamples,
+        sample_rate: this.sampleRate,
+        stream_exhausted: isStreamExhausted,
+        interaction_complete: turnDetected && transcriptText.length > 0,
+        error_occurred: errorOccurred,
+        error_message: errorMessage,
+        is_text_input: isTextInput,
+        text_content: textContent,
+      });
+    } catch (error) {
+      logger.error({ err: error, iteration }, 'transcription_failed');
+
+      const taggedStream = Object.assign(multimodalStream, {
+        type: 'MultimodalContent',
+        abort: () => {},
+        getMetadata: () => ({}),
+      });
+
+      return new DataStreamWithMetadata(taggedStream, {
+        elementType: 'MultimodalContent',
+        iteration: iteration,
+        interactionId: nextInteractionId,
+        session_id: sessionId,
+        transcript: '',
+        turn_detected: false,
+        stream_exhausted: isStreamExhausted,
+        interaction_complete: false,
+        error_occurred: true,
+        error_message: error instanceof Error ? error.message : String(error),
+        is_text_input: isTextInput,
+        text_content: textContent,
+      });
+    } finally {
+      if (session) {
+        session.offMessage(messageHandler);
+      }
+    }
+  }
+
+  private sendPartialTranscript(
+    sessionId: string,
+    interactionId: string,
+    text: string
+  ): void {
+    const connection = this.connections[sessionId];
+    if (!connection?.onPartialTranscript) return;
+
+    try {
+      connection.onPartialTranscript(text, interactionId);
+    } catch (error) {
+      logger.error({ err: error }, 'error_sending_partial_transcript');
+    }
+  }
+
+  async closeSession(sessionId: string): Promise<void> {
+    const session = this.sessions.get(sessionId);
+    if (session) {
+      logger.debug({ sessionId }, 'closing_session');
+      await session.close();
+      this.sessions.delete(sessionId);
+    }
+  }
+
+  async destroy(): Promise<void> {
+    logger.info({ sessionCount: this.sessions.size }, 'destroying_node');
+
+    const promises: Promise<void>[] = [];
+    for (const session of this.sessions.values()) {
+      promises.push(session.close());
+    }
+
+    await Promise.all(promises);
+    this.sessions.clear();
+  }
+}
diff --git a/backend/src/graphs/nodes/stt-node.ts b/backend/src/graphs/nodes/stt-node.ts
new file mode 100644
index 0000000..2d58b62
--- /dev/null
+++ b/backend/src/graphs/nodes/stt-node.ts
@@ -0,0 +1,9 @@
+/**
+ * Common interface for STT (Speech-to-Text) nodes.
+ * Both AssemblyAI and Soniox implementations conform to this interface
+ * so they can be used interchangeably in the conversation graph.
+ */
+export interface STTNode {
+  closeSession(sessionId: string): Promise<void>;
+  destroy(): Promise<void>;
+}
diff --git a/backend/src/helpers/connection-manager.ts b/backend/src/helpers/connection-manager.ts
index 9659d5a..0a8f99c 100644
--- a/backend/src/helpers/connection-manager.ts
+++ b/backend/src/helpers/connection-manager.ts
@@ -4,7 +4,7 @@
  * This replaces the AudioProcessor for Inworld Runtime 0.9.
  * Key differences from AudioProcessor:
  * - Uses MultimodalStreamManager to feed audio to a long-running graph
- * - VAD is handled inside the graph by AssemblyAI (not external Silero)
+ * - VAD is handled inside the graph by the STT provider (AssemblyAI or Soniox)
  * - Graph runs continuously for the session duration
  */
 
@@ -1085,8 +1085,8 @@ export class ConnectionManager {
     // End the multimodal stream
     this.multimodalStreamManager.end();
 
-    // Close AssemblyAI session
-    await this.graphWrapper.assemblyAINode.closeSession(this.sessionId);
+    // Close STT session
+    await this.graphWrapper.sttNode.closeSession(this.sessionId);
 
     // Remove from connections map
     delete this.connections[this.sessionId];
diff --git a/backend/src/server.ts b/backend/src/server.ts
index 90fc1d3..0b61b34 100644
--- a/backend/src/server.ts
+++ b/backend/src/server.ts
@@ -105,9 +105,10 @@ async function startServer(): Promise<void> {
   try {
     await initializeGraph();
     await exportGraphConfigs();
+    const sttProvider = process.env.STT_PROVIDER || 'assembly';
     server.listen(serverConfig.port, () => {
       logger.info({ port: serverConfig.port }, 'server_started');
-      logger.info('using_inworld_runtime_0.9_with_assemblyai_stt');
+      logger.info({ sttProvider }, 'using_inworld_runtime_0.9_with_stt');
     });
   } catch (error) {
     logger.fatal({ err: error }, 'server_start_failed');
diff --git a/backend/src/services/graph-service.ts b/backend/src/services/graph-service.ts
index 1bfd910..f0cad43 100644
--- a/backend/src/services/graph-service.ts
+++ b/backend/src/services/graph-service.ts
@@ -19,6 +19,7 @@ import { getResponseFeedbackGraph } from '../graphs/response-feedback-graph.js';
 import { initializeTTSGraphs } from '../graphs/simple-tts-graph.js';
 import { serverLogger as logger } from '../utils/logger.js';
 import { connections } from './state.js';
+import { STTProvider } from '../config/server.js';
 
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
@@ -31,16 +32,33 @@ export function getGraphWrapper(): ConversationGraphWrapper | null {
 }
 
 export async function initializeGraph(): Promise<void> {
-  const assemblyAIApiKey = process.env.ASSEMBLY_AI_API_KEY;
-  if (!assemblyAIApiKey) {
-    throw new Error('ASSEMBLY_AI_API_KEY environment variable is required');
+  // Read STT_PROVIDER from process.env at call time (after dotenv has loaded),
+  // not from serverConfig which is evaluated at module load time before dotenv.
+  const sttProvider = (process.env.STT_PROVIDER || 'assembly') as STTProvider;
+  let sttApiKey: string;
+
+  if (sttProvider === 'soniox') {
+    sttApiKey = process.env.SONIOX_API_KEY || '';
+    if (!sttApiKey) {
+      throw new Error(
+        'SONIOX_API_KEY environment variable is required when STT_PROVIDER=soniox'
+      );
+    }
+  } else {
+    sttApiKey = process.env.ASSEMBLY_AI_API_KEY || '';
+    if (!sttApiKey) {
+      throw new Error(
+        'ASSEMBLY_AI_API_KEY environment variable is required when STT_PROVIDER=assembly'
+      );
+    }
   }
 
-  logger.info('initializing_conversation_graph');
+  logger.info({ sttProvider }, 'initializing_conversation_graph');
   graphWrapper = getConversationGraph({
-    assemblyAIApiKey,
+    sttProvider,
+    sttApiKey,
     connections,
-    defaultLanguageCode: 'es', // Always Spanish
+    defaultLanguageCode: 'es',
   });
   logger.info('conversation_graph_initialized');
 
diff --git a/frontend/package-lock.json b/frontend/package-lock.json
index 5d0a7b8..8b55696 100644
--- a/frontend/package-lock.json
+++ b/frontend/package-lock.json
@@ -84,7 +84,6 @@
       "integrity": "sha512-e7jT4DxYvIDLk1ZHmU/m/mB19rex9sv0c2ftBtjSBv+kVM/902eh0fINUzD7UwLLNR+jU585GxUJ8/EBfAM5fw==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@babel/code-frame": "^7.27.1",
         "@babel/generator": "^7.28.5",
@@ -424,7 +423,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=18"
       },
@@ -448,7 +446,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=18"
       }
@@ -1667,7 +1664,6 @@
       "integrity": "sha512-MWtvHrGZLFttgeEj28VXHxpmwYbor/ATPYbBfSFZEIRK0ecCFLl2Qo55z52Hss+UV9CRN7trSeq1zbgx7YDWWg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "csstype": "^3.2.2"
       }
@@ -1736,7 +1732,6 @@
       "integrity": "sha512-3xP4XzzDNQOIqBMWogftkwxhg5oMKApqY0BAflmLZiFYHqyhSOxv/cd/zPQLTcCXr4AkaKb25joocY0BD1WC6A==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@typescript-eslint/scope-manager": "8.51.0",
         "@typescript-eslint/types": "8.51.0",
@@ -2099,7 +2094,6 @@
       "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "bin": {
         "acorn": "bin/acorn"
       },
@@ -2232,7 +2226,6 @@
         }
       ],
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "baseline-browser-mapping": "^2.9.0",
         "caniuse-lite": "^1.0.30001759",
@@ -2627,7 +2620,6 @@
       "integrity": "sha512-LEyamqS7W5HB3ujJyvi0HQK/dtVINZvd5mAAp9eT5S/ujByGjiZLCzPcHVzuXbpJDJF/cxwHlfceVUDZ2lnSTw==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "@eslint-community/eslint-utils": "^4.8.0",
         "@eslint-community/regexpp": "^4.12.1",
@@ -2688,7 +2680,6 @@
       "integrity": "sha512-iI1f+D2ViGn+uvv5HuHVUamg8ll4tN+JRHGc6IJi4TP9Kl976C57fzPXgseXNs8v0iA8aSJpHsTWjDb9QJamGQ==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "bin": {
         "eslint-config-prettier": "bin/cli.js"
       },
@@ -3330,7 +3321,6 @@
       "integrity": "sha512-8i7LzZj7BF8uplX+ZyOlIz86V6TAsSs+np6m1kpW9u0JWi4z/1t+FzcK1aek+ybTnAC4KhBL4uXCNT0wcUIeCw==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "cssstyle": "^4.1.0",
         "data-urls": "^5.0.0",
@@ -3700,7 +3690,6 @@
       "integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=12"
       },
@@ -3753,7 +3742,6 @@
       "integrity": "sha512-v6UNi1+3hSlVvv8fSaoUbggEM5VErKmmpGA7Pl3HF8V6uKY7rvClBOJlH6yNwQtfTueNkGVpOv/mtWL9L4bgRA==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "bin": {
         "prettier": "bin/prettier.cjs"
       },
@@ -3792,7 +3780,6 @@
       "resolved": "https://registry.npmjs.org/react/-/react-19.2.3.tgz",
       "integrity": "sha512-Ku/hhYbVjOQnXDZFv2+RibmLFGwFdeeKHFcOTlrt7xplBnya5OGn/hIRDsqDiSUcfORsDC7MPxwork8jBwsIWA==",
       "license": "MIT",
-      "peer": true,
       "engines": {
         "node": ">=0.10.0"
       }
@@ -4145,7 +4132,6 @@
       "integrity": "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw==",
       "dev": true,
       "license": "Apache-2.0",
-      "peer": true,
       "bin": {
         "tsc": "bin/tsc",
         "tsserver": "bin/tsserver"
@@ -4231,7 +4217,6 @@
       "integrity": "sha512-dZwN5L1VlUBewiP6H9s2+B3e3Jg96D0vzN+Ry73sOefebhYr9f94wwkMNN/9ouoU8pV1BqA1d1zGk8928cx0rg==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "dependencies": {
         "esbuild": "^0.27.0",
         "fdir": "^6.5.0",
@@ -4547,7 +4532,6 @@
       "integrity": "sha512-0wZ1IRqGGhMP76gLqz8EyfBXKk0J2qo2+H3fi4mcUP/KtTocoX08nmIAHl1Z2kJIZbZee8KOpBCSNPRgauucjw==",
       "dev": true,
       "license": "MIT",
-      "peer": true,
       "funding": {
         "url": "https://github.com/sponsors/colinhacks"
       }
diff --git a/render.yaml b/render.yaml
index f9f53ca..76f0d4e 100644
--- a/render.yaml
+++ b/render.yaml
@@ -12,8 +12,12 @@ services:
         value: production
       - key: INWORLD_API_KEY
         sync: false
+      - key: STT_PROVIDER
+        value: assembly
       - key: ASSEMBLY_AI_API_KEY
         sync: false
+      - key: SONIOX_API_KEY
+        sync: false
       - key: SUPABASE_URL
         sync: false
       - key: SUPABASE_SECRET_KEY

From 0fddce47510bf91a778c745d33a2714618af4211 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 14:26:01 -0800
Subject: [PATCH 02/16] fix: conversation histories are separate

---
 backend/src/services/websocket-handler.ts | 22 +++++++
 frontend/src/context/AppContext.tsx       | 76 ++++++++++++++++-------
 2 files changed, 74 insertions(+), 24 deletions(-)

diff --git a/backend/src/services/websocket-handler.ts b/backend/src/services/websocket-handler.ts
index e5fee0e..bd3350c 100644
--- a/backend/src/services/websocket-handler.ts
+++ b/backend/src/services/websocket-handler.ts
@@ -294,12 +294,34 @@ function handleConversationUpdate(
   connectionId: string,
   connectionManager: ConnectionManager,
   message: {
+    conversationId?: string;
     data?: {
+      conversationId?: string;
       messages?: Array<{ role: string; content: string; timestamp?: string }>;
     };
     messages?: Array<{ role: string; content: string; timestamp?: string }>;
   }
 ): void {
+  const incomingConversationId =
+    message.conversationId || message.data?.conversationId;
+  const currentConversationId = connectionManager.getConversationId();
+
+  if (
+    incomingConversationId &&
+    currentConversationId &&
+    incomingConversationId !== currentConversationId
+  ) {
+    logger.info(
+      {
+        connectionId,
+        incomingConversationId,
+        currentConversationId,
+      },
+      'ignoring_stale_conversation_update'
+    );
+    return;
+  }
+
   // Handle both formats: { data: { messages: [...] } } and { messages: [...] }
   const messages =
     message.messages ||
diff --git a/frontend/src/context/AppContext.tsx b/frontend/src/context/AppContext.tsx
index 4a20157..f5cea64 100644
--- a/frontend/src/context/AppContext.tsx
+++ b/frontend/src/context/AppContext.tsx
@@ -520,8 +520,6 @@ export function AppProvider({ children }: AppProviderProps) {
 
     // Case 1: We have a pending LLM response but user message was already added (text input case)
     if (pendingLLMResponse && !pendingTranscription) {
-      // Add only the teacher response
-      storage.addMessage('assistant', pendingLLMResponse);
       dispatch({
         type: 'ADD_MESSAGE',
         payload: {
@@ -531,8 +529,24 @@ export function AppProvider({ children }: AppProviderProps) {
         },
       });
 
-      const conversationHistory = storage.getConversationHistory();
-      wsClient.send({ type: 'conversation_update', data: conversationHistory });
+      // Build conversation_update from current chatHistory + new assistant message
+      const messages = [
+        ...currentState.chatHistory.map((m) => ({
+          role: m.role === 'learner' ? 'user' : 'assistant',
+          content: m.content,
+          timestamp: m.timestamp || new Date().toISOString(),
+        })),
+        {
+          role: 'assistant',
+          content: pendingLLMResponse,
+          timestamp: new Date().toISOString(),
+        },
+      ];
+      wsClient.send({
+        type: 'conversation_update',
+        conversationId,
+        messages,
+      });
 
       pendingLLMResponseRef.current = null;
       dispatch({ type: 'RESET_STREAMING_STATE' });
@@ -562,7 +576,6 @@ export function AppProvider({ children }: AppProviderProps) {
         });
       }
 
-      storage.addMessage('user', pendingTranscription);
       dispatch({
         type: 'ADD_MESSAGE',
         payload: {
@@ -572,7 +585,6 @@ export function AppProvider({ children }: AppProviderProps) {
         },
       });
 
-      storage.addMessage('assistant', pendingLLMResponse);
       dispatch({
         type: 'ADD_MESSAGE',
         payload: {
@@ -582,8 +594,29 @@ export function AppProvider({ children }: AppProviderProps) {
         },
       });
 
-      const conversationHistory = storage.getConversationHistory();
-      wsClient.send({ type: 'conversation_update', data: conversationHistory });
+      // Build conversation_update from current chatHistory + new user + assistant messages
+      const messages = [
+        ...currentState.chatHistory.map((m) => ({
+          role: m.role === 'learner' ? 'user' : 'assistant',
+          content: m.content,
+          timestamp: m.timestamp || new Date().toISOString(),
+        })),
+        {
+          role: 'user',
+          content: pendingTranscription,
+          timestamp: new Date().toISOString(),
+        },
+        {
+          role: 'assistant',
+          content: pendingLLMResponse,
+          timestamp: new Date().toISOString(),
+        },
+      ];
+      wsClient.send({
+        type: 'conversation_update',
+        conversationId,
+        messages,
+      });
 
       dispatch({ type: 'SET_PENDING_TRANSCRIPTION', payload: null });
       pendingLLMResponseRef.current = null;
@@ -686,12 +719,16 @@ export function AppProvider({ children }: AppProviderProps) {
       });
 
       if (status === 'connected') {
-        const existingConversation = storage.getConversationHistory();
-        if (existingConversation.messages.length > 0) {
-          wsClient.send({
-            type: 'conversation_update',
-            data: existingConversation,
-          });
+        const currentId = stateRef.current.currentConversationId;
+        if (currentId) {
+          const conversationData = storage.getConversation(currentId);
+          if (conversationData && conversationData.messages.length > 0) {
+            wsClient.send({
+              type: 'conversation_update',
+              conversationId: currentId,
+              messages: conversationData.messages,
+            });
+          }
         }
       }
     });
@@ -965,17 +1002,9 @@ export function AppProvider({ children }: AppProviderProps) {
         })) as ChatMessage[];
 
         // Update chat history to match server state
+        // Per-conversation storage is kept in sync by the useEffect on chatHistory
         dispatch({ type: 'SET_CHAT_HISTORY', payload: chatHistory });
 
-        // Also update storage to stay in sync
-        storage.clearConversation();
-        messages.forEach((m) => {
-          storage.addMessage(
-            m.role === 'user' ? 'user' : 'assistant',
-            m.content
-          );
-        });
-
         // Clear any pending state
         dispatch({ type: 'SET_PENDING_TRANSCRIPTION', payload: null });
         pendingLLMResponseRef.current = null;
@@ -1263,7 +1292,6 @@ export function AppProvider({ children }: AppProviderProps) {
       }
 
       // Add user message to chat history immediately (unlike audio where we wait for transcription)
-      storage.addMessage('user', trimmedText);
       dispatch({
         type: 'ADD_MESSAGE',
         payload: {

From 95acafe776ff08e10914d4d1d8ab41ec7856c0b9 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 14:31:32 -0800
Subject: [PATCH 03/16] feat: add dynamic language hints to soniox

---
 backend/src/config/server.ts                  |  4 ---
 backend/src/graphs/conversation-graph.ts      |  1 -
 .../src/graphs/nodes/soniox-stt-ws-node.ts    | 29 +++++++++++++++++--
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/backend/src/config/server.ts b/backend/src/config/server.ts
index dd71935..4e6f9d3 100644
--- a/backend/src/config/server.ts
+++ b/backend/src/config/server.ts
@@ -63,7 +63,6 @@ const assemblyAIPresets: Record<
 
 export interface SonioxEndpointSettings {
   maxEndpointDelayMs: number;
-  languageHints: string[];
   description: string;
 }
 
@@ -75,17 +74,14 @@ export interface SonioxEndpointSettings {
 const sonioxPresets: Record<AssemblyAIEagerness, SonioxEndpointSettings> = {
   high: {
     maxEndpointDelayMs: 500,
-    languageHints: ['en', 'es'],
     description: 'Aggressive - fastest endpoint detection (500ms)',
   },
   medium: {
     maxEndpointDelayMs: 1000,
-    languageHints: ['en', 'es'],
     description: 'Balanced - moderate endpoint delay (1000ms)',
   },
   low: {
     maxEndpointDelayMs: 2000,
-    languageHints: ['en', 'es'],
     description: 'Conservative - patient endpoint detection (2000ms)',
   },
 };
diff --git a/backend/src/graphs/conversation-graph.ts b/backend/src/graphs/conversation-graph.ts
index 938392f..fdd627d 100644
--- a/backend/src/graphs/conversation-graph.ts
+++ b/backend/src/graphs/conversation-graph.ts
@@ -116,7 +116,6 @@ export class ConversationGraphWrapper {
           connections: connections,
           sampleRate: serverConfig.audio.inputSampleRate,
           maxEndpointDelayMs: sonioxSettings.maxEndpointDelayMs,
-          languageHints: sonioxSettings.languageHints,
         },
       });
     } else {
diff --git a/backend/src/graphs/nodes/soniox-stt-ws-node.ts b/backend/src/graphs/nodes/soniox-stt-ws-node.ts
index e78ce48..afe231c 100644
--- a/backend/src/graphs/nodes/soniox-stt-ws-node.ts
+++ b/backend/src/graphs/nodes/soniox-stt-ws-node.ts
@@ -180,6 +180,23 @@ class SonioxSession {
     }
   }
 
+  /**
+   * Update language hints. If they differ from the current hints, closes the
+   * existing WebSocket so the next ensureConnection() reopens with the new config.
+   */
+  public updateLanguageHints(hints: string[]): void {
+    const sorted = [...hints].sort();
+    const currentSorted = [...this.languageHints].sort();
+    if (sorted.join(',') === currentSorted.join(',')) return;
+
+    logger.info(
+      { sessionId: this.sessionId, from: this.languageHints, to: hints },
+      'language_hints_changed'
+    );
+    this.languageHints = hints;
+    this.closeWebSocket();
+  }
+
   private closeDueToInactivity(): void {
     const inactiveFor = Date.now() - this.lastActivityTime;
     logger.info(
@@ -267,7 +284,7 @@ export class SonioxSTTWebSocketNode extends CustomNode implements STTNode {
     this.connections = config.connections;
     this.sampleRate = config.sampleRate || 16000;
     this.maxEndpointDelayMs = config.maxEndpointDelayMs ?? 2000;
-    this.languageHints = config.languageHints ?? [];
+    this.languageHints = config.languageHints ?? ['en'];
 
     logger.info(
       {
@@ -346,6 +363,12 @@ export class SonioxSTTWebSocketNode extends CustomNode implements STTNode {
     // Soniox token accumulation
     let finalTokenTexts: string[] = [];
 
+    // Derive per-session language hints from the connection's active language
+    const targetLang = connection.state.languageCode || 'es';
+    const sessionLanguageHints = targetLang === 'en'
+      ? ['en']
+      : ['en', targetLang];
+
     // Get or create session
     let session = this.sessions.get(sessionId);
     if (!session) {
@@ -354,9 +377,11 @@ export class SonioxSTTWebSocketNode extends CustomNode implements STTNode {
         this.apiKey,
         this.sampleRate,
         this.maxEndpointDelayMs,
-        this.languageHints
+        sessionLanguageHints
       );
       this.sessions.set(sessionId, session);
+    } else {
+      session.updateLanguageHints(sessionLanguageHints);
     }
 
     // Promise to capture turn result

From 31270473f37c43ef5b5d123305799339ca711dad Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 14:49:55 -0800
Subject: [PATCH 04/16] feat: added support for chinese, japanese, korean, and
 russian

---
 .../src/__tests__/config/languages.test.ts    |  21 ++-
 backend/src/config/languages.ts               | 156 ++++++++++++++++--
 backend/src/config/server.ts                  |   8 +
 backend/src/graphs/simple-tts-graph.ts        |   4 +-
 backend/src/services/api-routes.ts            |   3 +-
 backend/src/services/websocket-handler.ts     |   6 +-
 6 files changed, 179 insertions(+), 19 deletions(-)

diff --git a/backend/src/__tests__/config/languages.test.ts b/backend/src/__tests__/config/languages.test.ts
index 426d959..59f4933 100644
--- a/backend/src/__tests__/config/languages.test.ts
+++ b/backend/src/__tests__/config/languages.test.ts
@@ -74,13 +74,30 @@ describe('languages config', () => {
       expect(codes).toContain('de');
     });
 
-    it('matches SUPPORTED_LANGUAGES keys', () => {
+    it('without provider, returns only languages without requiredSttProvider', () => {
       const codes = getSupportedLanguageCodes();
-      expect(codes.length).toBe(Object.keys(SUPPORTED_LANGUAGES).length);
       for (const code of codes) {
         expect(SUPPORTED_LANGUAGES[code]).toBeDefined();
+        expect(SUPPORTED_LANGUAGES[code].requiredSttProvider).toBeUndefined();
       }
     });
+
+    it('with soniox provider, returns all languages', () => {
+      const codes = getSupportedLanguageCodes('soniox');
+      expect(codes.length).toBe(Object.keys(SUPPORTED_LANGUAGES).length);
+      expect(codes).toContain('zh');
+      expect(codes).toContain('ja');
+      expect(codes).toContain('ko');
+      expect(codes).toContain('ru');
+    });
+
+    it('with assembly provider, excludes soniox-only languages', () => {
+      const codes = getSupportedLanguageCodes('assembly');
+      expect(codes).not.toContain('zh');
+      expect(codes).not.toContain('ja');
+      expect(codes).not.toContain('ko');
+      expect(codes).not.toContain('ru');
+    });
   });
 
   describe('getLanguageOptions', () => {
diff --git a/backend/src/config/languages.ts b/backend/src/config/languages.ts
index f6d43ad..ec20d1d 100644
--- a/backend/src/config/languages.ts
+++ b/backend/src/config/languages.ts
@@ -8,6 +8,7 @@
  */
 
 import { createLogger } from '../utils/logger.js';
+import type { STTProvider } from './server.js';
 
 const logger = createLogger('Languages');
 
@@ -46,6 +47,9 @@ export interface LanguageConfig {
 
   // Example conversation topics specific to this language's culture
   exampleTopics: string[];
+
+  // If set, this language is only available when the given STT provider is active
+  requiredSttProvider?: STTProvider;
 }
 
 /**
@@ -235,6 +239,126 @@ export const SUPPORTED_LANGUAGES: Record<string, LanguageConfig> = {
       'the Amazon and Brazilian nature',
     ],
   },
+
+  zh: {
+    code: 'zh',
+    name: 'Chinese',
+    nativeName: '中文',
+    flag: '🇨🇳',
+    sttLanguageCode: 'zh-CN',
+    ttsConfig: {
+      speakerId: 'Xiaoyin',
+      modelId: 'inworld-tts-1.5-max',
+      speakingRate: 1,
+      temperature: 1.1,
+      languageCode: 'zh-CN',
+    },
+    teacherPersona: {
+      name: '李老师 (Lǐ Lǎoshī)',
+      age: 33,
+      nationality: 'Chinese (Beijing)',
+      description:
+        'a 33 year old Beijinger who loves teaching Mandarin through Chinese culture, food, and modern life',
+    },
+    exampleTopics: [
+      'life in Beijing and Shanghai',
+      'Chinese cuisine and regional flavors',
+      'Chinese festivals and traditions',
+      'modern Chinese pop culture',
+      'travel along the Silk Road',
+    ],
+    requiredSttProvider: 'soniox',
+  },
+
+  ja: {
+    code: 'ja',
+    name: 'Japanese',
+    nativeName: '日本語',
+    flag: '🇯🇵',
+    sttLanguageCode: 'ja-JP',
+    ttsConfig: {
+      speakerId: 'Asuka',
+      modelId: 'inworld-tts-1.5-max',
+      speakingRate: 1,
+      temperature: 1.1,
+      languageCode: 'ja-JP',
+    },
+    teacherPersona: {
+      name: '田中先生 (Tanaka-sensei)',
+      age: 31,
+      nationality: 'Japanese (Tokyo)',
+      description:
+        'a 31 year old Tokyoite who is passionate about teaching Japanese through anime, food, and everyday life',
+    },
+    exampleTopics: [
+      'daily life in Tokyo',
+      'Japanese cuisine from ramen to kaiseki',
+      'anime and manga culture',
+      'Japanese seasons and festivals',
+      'travel through Kyoto and rural Japan',
+    ],
+    requiredSttProvider: 'soniox',
+  },
+
+  ko: {
+    code: 'ko',
+    name: 'Korean',
+    nativeName: '한국어',
+    flag: '🇰🇷',
+    sttLanguageCode: 'ko-KR',
+    ttsConfig: {
+      speakerId: 'Seojun',
+      modelId: 'inworld-tts-1.5-max',
+      speakingRate: 1,
+      temperature: 1.1,
+      languageCode: 'ko-KR',
+    },
+    teacherPersona: {
+      name: '김선생님 (Kim Seonsaengnim)',
+      age: 29,
+      nationality: 'Korean (Seoul)',
+      description:
+        'a 29 year old Seoulite who enjoys teaching Korean through K-pop, K-drama, and Korean street food culture',
+    },
+    exampleTopics: [
+      'life in Seoul and Busan',
+      'Korean food and street food culture',
+      'K-pop and K-drama',
+      'Korean traditions and holidays',
+      'travel through South Korea',
+    ],
+    requiredSttProvider: 'soniox',
+  },
+
+  ru: {
+    code: 'ru',
+    name: 'Russian',
+    nativeName: 'Русский',
+    flag: '🇷🇺',
+    sttLanguageCode: 'ru-RU',
+    ttsConfig: {
+      speakerId: 'Elena',
+      modelId: 'inworld-tts-1.5-max',
+      speakingRate: 1,
+      temperature: 1.1,
+      languageCode: 'ru-RU',
+    },
+    teacherPersona: {
+      name: 'Елена Петровна (Elena Petrovna)',
+      age: 37,
+      nationality: 'Russian (Moscow)',
+      description:
+        'a 37 year old Muscovite who loves teaching Russian through literature, history, and the richness of Russian culture',
+    },
+    exampleTopics: [
+      'life in Moscow and Saint Petersburg',
+      'Russian literature and poetry',
+      'Russian cuisine and tea culture',
+      'Russian music from classical to modern',
+      'the Trans-Siberian Railway and Russian nature',
+    ],
+    requiredSttProvider: 'soniox',
+  },
 };
 
 /**
@@ -255,27 +379,37 @@ export function getLanguageConfig(code: string): LanguageConfig {
 }
 
 /**
- * Get all supported language codes
+ * Get all supported language codes, optionally filtered by STT provider
  */
-export function getSupportedLanguageCodes(): string[] {
-  return Object.keys(SUPPORTED_LANGUAGES);
+export function getSupportedLanguageCodes(sttProvider?: STTProvider): string[] {
+  return Object.values(SUPPORTED_LANGUAGES)
+    .filter(
+      (lang) =>
+        !lang.requiredSttProvider || lang.requiredSttProvider === sttProvider
+    )
+    .map((lang) => lang.code);
 }
 
 /**
- * Get language options for frontend dropdown
+ * Get language options for frontend dropdown, optionally filtered by STT provider
  */
-export function getLanguageOptions(): Array<{
+export function getLanguageOptions(sttProvider?: STTProvider): Array<{
   code: string;
   name: string;
   nativeName: string;
   flag: string;
 }> {
-  return Object.values(SUPPORTED_LANGUAGES).map((lang) => ({
-    code: lang.code,
-    name: lang.name,
-    nativeName: lang.nativeName,
-    flag: lang.flag,
-  }));
+  return Object.values(SUPPORTED_LANGUAGES)
+    .filter(
+      (lang) =>
+        !lang.requiredSttProvider || lang.requiredSttProvider === sttProvider
+    )
+    .map((lang) => ({
+      code: lang.code,
+      name: lang.name,
+      nativeName: lang.nativeName,
+      flag: lang.flag,
+    }));
 }
 
 /**
diff --git a/backend/src/config/server.ts b/backend/src/config/server.ts
index 4e6f9d3..dbb36fd 100644
--- a/backend/src/config/server.ts
+++ b/backend/src/config/server.ts
@@ -153,6 +153,14 @@ export function getAssemblyAISettingsForEagerness(
   return assemblyAIPresets[eagerness];
 }
 
+/**
+ * Get the active STT provider at call time (after dotenv loads).
+ * Do NOT use serverConfig.sttProvider — it is evaluated at module load time before dotenv.
+ */
+export function getSttProvider(): STTProvider {
+  return (process.env.STT_PROVIDER || 'assembly') as STTProvider;
+}
+
 /**
  * Get Soniox endpoint detection settings for the configured eagerness level.
  * Reads SONIOX_EAGERNESS from process.env at call time (after dotenv loads).
diff --git a/backend/src/graphs/simple-tts-graph.ts b/backend/src/graphs/simple-tts-graph.ts
index 2b7dd57..02877cb 100644
--- a/backend/src/graphs/simple-tts-graph.ts
+++ b/backend/src/graphs/simple-tts-graph.ts
@@ -22,7 +22,7 @@ import {
   getLanguageConfig,
   getSupportedLanguageCodes,
 } from '../config/languages.js';
-import { serverConfig } from '../config/server.js';
+import { serverConfig, getSttProvider } from '../config/server.js';
 import { graphLogger as logger } from '../utils/logger.js';
 
 export interface SimpleTTSInput {
@@ -93,7 +93,7 @@ const simpleTTSGraphs = new Map<string, Graph>();
  * Initialize TTS graphs for all supported languages
  */
 export function initializeTTSGraphs(): void {
-  const languageCodes = getSupportedLanguageCodes();
+  const languageCodes = getSupportedLanguageCodes(getSttProvider());
 
   logger.info(
     { languageCount: languageCodes.length },
diff --git a/backend/src/services/api-routes.ts b/backend/src/services/api-routes.ts
index ab481bb..dccac71 100644
--- a/backend/src/services/api-routes.ts
+++ b/backend/src/services/api-routes.ts
@@ -10,6 +10,7 @@ import {
   getLanguageOptions,
   DEFAULT_LANGUAGE_CODE,
 } from '../config/languages.js';
+import { getSttProvider } from '../config/server.js';
 import { serverLogger as logger } from '../utils/logger.js';
 
 export const apiRouter = Router();
@@ -53,7 +54,7 @@ apiRouter.post('/export-anki', async (req, res) => {
 // Languages endpoint
 apiRouter.get('/languages', (_req, res) => {
   try {
-    const languages = getLanguageOptions();
+    const languages = getLanguageOptions(getSttProvider());
     res.json({ languages, defaultLanguage: DEFAULT_LANGUAGE_CODE });
   } catch (error) {
     logger.error({ err: error }, 'get_languages_error');
diff --git a/backend/src/services/websocket-handler.ts b/backend/src/services/websocket-handler.ts
index bd3350c..6115d6c 100644
--- a/backend/src/services/websocket-handler.ts
+++ b/backend/src/services/websocket-handler.ts
@@ -19,7 +19,7 @@ import {
 } from '../config/languages.js';
 import { serverLogger as logger } from '../utils/logger.js';
 import { getSimpleTTSGraph } from '../graphs/simple-tts-graph.js';
-import { serverConfig } from '../config/server.js';
+import { serverConfig, getSttProvider } from '../config/server.js';
 
 import {
   connections,
@@ -411,7 +411,7 @@ async function handleConversationSwitch(
   }
 
   // Validate language code
-  const supportedCodes = getSupportedLanguageCodes();
+  const supportedCodes = getSupportedLanguageCodes(getSttProvider());
   const languageCode = supportedCodes.includes(requestedLanguageCode)
     ? requestedLanguageCode
     : DEFAULT_LANGUAGE_CODE;
@@ -518,7 +518,7 @@ function handleUserContext(
   const currentAttrs = connectionAttributes.get(connectionId) || {};
 
   // Validate language code
-  const supportedCodes = getSupportedLanguageCodes();
+  const supportedCodes = getSupportedLanguageCodes(getSttProvider());
   const validatedLanguageCode =
     languageCode && supportedCodes.includes(languageCode)
       ? languageCode

From fd3220b65b5d83732a8cfa82bb37bdbfcbe4aaca Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 17:11:12 -0800
Subject: [PATCH 05/16] feat: add flashcard translated sentences, pinyin for
 zh, and audio for sentences

---
 backend/src/graphs/flashcard-graph.ts         |  6 +-
 backend/src/helpers/anki-exporter.ts          | 15 +++-
 backend/src/helpers/flashcard-processor.ts    |  4 +
 backend/src/prompts/flashcard.njk             |  8 +-
 frontend/src/components/Flashcard.tsx         | 39 +++++++++-
 frontend/src/components/FlashcardsSection.tsx | 15 ++++
 frontend/src/services/SupabaseStorage.ts      | 12 +++
 frontend/src/styles/main.css                  | 76 +++++++++++++++++++
 frontend/src/types/index.ts                   |  3 +
 .../20240108000000_initial_schema.sql         |  3 +
 10 files changed, 176 insertions(+), 5 deletions(-)

diff --git a/backend/src/graphs/flashcard-graph.ts b/backend/src/graphs/flashcard-graph.ts
index 6f8b108..cec41c2 100644
--- a/backend/src/graphs/flashcard-graph.ts
+++ b/backend/src/graphs/flashcard-graph.ts
@@ -51,7 +51,7 @@ class FlashcardParserNode extends CustomNode {
       const jsonMatch = textContent.match(/\{[\s\S]*\}/);
       if (jsonMatch) {
         const parsed = JSON.parse(jsonMatch[0]);
-        return {
+        const result: Record<string, unknown> = {
           id: v4(),
           // Support both new 'targetWord' format and legacy 'spanish' format
           targetWord: parsed.targetWord ?? parsed.spanish ?? '',
@@ -60,6 +60,10 @@ class FlashcardParserNode extends CustomNode {
           mnemonic: parsed.mnemonic ?? '',
           timestamp: new Date().toISOString(),
         };
+        if (parsed.exampleTranslation) result.exampleTranslation = parsed.exampleTranslation;
+        if (parsed.pinyin) result.pinyin = parsed.pinyin;
+        if (parsed.examplePinyin) result.examplePinyin = parsed.examplePinyin;
+        return result;
       }
     } catch (error) {
       logger.error({ err: error }, 'failed_to_parse_flashcard_json');
diff --git a/backend/src/helpers/anki-exporter.ts b/backend/src/helpers/anki-exporter.ts
index 710eeb8..793a3be 100644
--- a/backend/src/helpers/anki-exporter.ts
+++ b/backend/src/helpers/anki-exporter.ts
@@ -30,7 +30,10 @@ export class AnkiExporter {
         return;
       }
 
-      const front = targetWord.trim();
+      let front = targetWord.trim();
+      if (flashcard.pinyin) {
+        front += `<br><span style="font-size: 14px; color: #888;">${this.escapeHtml(flashcard.pinyin)}</span>`;
+      }
       const back = this.formatCardBack(flashcard);
 
       // Add tags for organization
@@ -61,7 +64,15 @@ export class AnkiExporter {
     let back = `<div style="font-size: 18px; margin-bottom: 10px;">${this.escapeHtml(flashcard.english)}</div>`;
 
     if (flashcard.example && flashcard.example.trim()) {
-      back += `<div style="font-size: 14px; color: #666; font-style: italic; margin: 10px 0; padding: 8px; background-color: #f5f5f5; border-left: 3px solid #2196F3;">${this.escapeHtml(flashcard.example)}</div>`;
+      let exampleHtml = this.escapeHtml(flashcard.example);
+      if (flashcard.examplePinyin) {
+        exampleHtml += `<br><span style="font-size: 12px; color: #999;">${this.escapeHtml(flashcard.examplePinyin)}</span>`;
+      }
+      back += `<div style="font-size: 14px; color: #666; font-style: italic; margin: 10px 0; padding: 8px; background-color: #f5f5f5; border-left: 3px solid #2196F3;">${exampleHtml}</div>`;
+    }
+
+    if (flashcard.exampleTranslation && flashcard.exampleTranslation.trim()) {
+      back += `<div style="font-size: 13px; color: #888; margin: -4px 0 10px 0; padding: 0 8px;">${this.escapeHtml(flashcard.exampleTranslation)}</div>`;
     }
 
     if (flashcard.mnemonic && flashcard.mnemonic.trim()) {
diff --git a/backend/src/helpers/flashcard-processor.ts b/backend/src/helpers/flashcard-processor.ts
index e7aa240..2b48628 100644
--- a/backend/src/helpers/flashcard-processor.ts
+++ b/backend/src/helpers/flashcard-processor.ts
@@ -15,7 +15,10 @@ export interface Flashcard {
   targetWord: string; // The word in the target language (was 'spanish')
   english: string;
   example: string;
+  exampleTranslation?: string;
   mnemonic: string;
+  pinyin?: string;
+  examplePinyin?: string;
   timestamp: string;
   languageCode?: string; // Track which language this card belongs to
 }
@@ -118,6 +121,7 @@ export class FlashcardProcessor {
         studentName: 'Student',
         teacherName: effectiveLanguageConfig.teacherPersona.name,
         target_language: effectiveLanguageConfig.name,
+        language_code: effectiveLanguageCode,
         messages: messages,
         flashcards: this.existingFlashcards,
       };
diff --git a/backend/src/prompts/flashcard.njk b/backend/src/prompts/flashcard.njk
index 82c7c27..79c3ac1 100644
--- a/backend/src/prompts/flashcard.njk
+++ b/backend/src/prompts/flashcard.njk
@@ -5,7 +5,9 @@ Based on the ongoing conversation between {{studentName}} and {{teacherName}}, g
 - The word in {{target_language}}
 - The translation in English
 - An example sentence in {{target_language}}
+- An English translation of the example sentence
 - A mnemonic to help the student remember the word (in English)
+{% if language_code == "zh" %}- The pinyin romanization for both the word and the example sentence{% endif %}
 
 ## Conversation
 
@@ -23,6 +25,7 @@ Based on the ongoing conversation between {{studentName}} and {{teacherName}}, g
 - The word must be related to the topics used in the conversation
 - The word should be useful to the learner so they can continue the conversation with new vocabulary
 - Avoid cognates
+{% if language_code == "zh" %}- Include accurate pinyin with tone marks (e.g. "nǐ hǎo") for the word and the example sentence{% endif %}
 
 Now, return JSON with the following format:
 
@@ -30,5 +33,8 @@ Now, return JSON with the following format:
   "targetWord": "string",
   "english": "string",
   "example": "string",
-  "mnemonic": "string"
+  "exampleTranslation": "string (English translation of the example sentence)",
+{% if language_code == "zh" %}  "pinyin": "string (pinyin for the word)",
+  "examplePinyin": "string (pinyin for the example sentence)",
+{% endif %}  "mnemonic": "string"
 }
\ No newline at end of file
diff --git a/frontend/src/components/Flashcard.tsx b/frontend/src/components/Flashcard.tsx
index 84bb9a3..d5e4726 100644
--- a/frontend/src/components/Flashcard.tsx
+++ b/frontend/src/components/Flashcard.tsx
@@ -5,7 +5,9 @@ interface FlashcardProps {
   flashcard: FlashcardType;
   onCardClick?: (flashcard: FlashcardType) => void;
   onPronounce?: (flashcard: FlashcardType) => void;
+  onPronounceText?: (text: string) => void;
   isPronouncing?: boolean;
+  isPronouncingSentence?: boolean;
 }
 
 function capitalizeFirstLetter(text: string): string {
@@ -17,7 +19,9 @@ export function Flashcard({
   flashcard,
   onCardClick,
   onPronounce,
+  onPronounceText,
   isPronouncing = false,
+  isPronouncingSentence = false,
 }: FlashcardProps) {
   const [isFlipped, setIsFlipped] = useState(false);
 
@@ -34,12 +38,26 @@ export function Flashcard({
     [flashcard, onPronounce]
   );
 
+  const handlePronounceExample = useCallback(
+    (e: React.MouseEvent) => {
+      e.stopPropagation();
+      const text = flashcard.example || flashcard.example_sentence || '';
+      if (text && onPronounceText) {
+        onPronounceText(text);
+      }
+    },
+    [flashcard, onPronounceText]
+  );
+
   // Support both new 'targetWord' and legacy 'spanish' field
   const targetWord =
     flashcard.targetWord || flashcard.spanish || flashcard.word || '';
   const english = flashcard.english || flashcard.translation || '';
   const example = flashcard.example || flashcard.example_sentence || '';
+  const exampleTranslation = flashcard.exampleTranslation || '';
   const mnemonic = flashcard.mnemonic || '';
+  const pinyin = flashcard.pinyin || '';
+  const examplePinyin = flashcard.examplePinyin || '';
 
   // Capitalize the first letter of the target word for display
   const displayTargetWord = capitalizeFirstLetter(targetWord);
@@ -52,6 +70,7 @@ export function Flashcard({
       <div className="flashcard-inner">
         <div className="flashcard-front">
           <div className="flashcard-target-word">{displayTargetWord}</div>
+          {pinyin && <div className="flashcard-pinyin">{pinyin}</div>}
           <button
             className={`pronounce-button ${isPronouncing ? 'loading' : ''}`}
             onClick={handlePronounce}
@@ -78,7 +97,25 @@ export function Flashcard({
         </div>
         <div className="flashcard-back">
           <div className="flashcard-english">{english}</div>
-          <div className="flashcard-example">{example}</div>
+          <div
+            className={`flashcard-example ${onPronounceText ? 'pronounceable' : ''} ${isPronouncingSentence ? 'pronouncing' : ''}`}
+            onClick={onPronounceText ? handlePronounceExample : undefined}
+            role={onPronounceText ? 'button' : undefined}
+            aria-label={onPronounceText ? 'Pronounce example sentence' : undefined}
+          >
+            <span>{example}</span>
+            {onPronounceText && (
+              <svg className="example-speaker-icon" viewBox="0 0 24 24" fill="currentColor">
+                <path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02z" />
+              </svg>
+            )}
+          </div>
+          {examplePinyin && (
+            <div className="flashcard-example-pinyin">{examplePinyin}</div>
+          )}
+          {exampleTranslation && (
+            <div className="flashcard-example-translation">{exampleTranslation}</div>
+          )}
           {mnemonic && (
             <div className="flashcard-mnemonic">
               <span className="mnemonic-label">Remember:</span>{' '}
diff --git a/frontend/src/components/FlashcardsSection.tsx b/frontend/src/components/FlashcardsSection.tsx
index e7c5145..8ac69d2 100644
--- a/frontend/src/components/FlashcardsSection.tsx
+++ b/frontend/src/components/FlashcardsSection.tsx
@@ -31,6 +31,14 @@ export function FlashcardsSection() {
     [pronounceWord]
   );
 
+  const handlePronounceText = useCallback(
+    (text: string) => {
+      if (!text) return;
+      pronounceWord(text);
+    },
+    [pronounceWord]
+  );
+
   const exportToAnki = useCallback(async () => {
     const validFlashcards = flashcards.filter((flashcard) => {
       const targetWord = flashcard.targetWord || flashcard.spanish;
@@ -138,7 +146,14 @@ export function FlashcardsSection() {
                   flashcard={flashcard}
                   onCardClick={handleCardClick}
                   onPronounce={handlePronounce}
+                  onPronounceText={handlePronounceText}
                   isPronouncing={pronouncingCardId === cardId}
+                  isPronouncingSentence={
+                    pronouncingCardId ===
+                    (flashcard.example ||
+                      flashcard.example_sentence ||
+                      '')
+                  }
                 />
               );
             })
diff --git a/frontend/src/services/SupabaseStorage.ts b/frontend/src/services/SupabaseStorage.ts
index 0a26dd5..b031b85 100644
--- a/frontend/src/services/SupabaseStorage.ts
+++ b/frontend/src/services/SupabaseStorage.ts
@@ -212,7 +212,10 @@ export class SupabaseStorage {
       targetWord: f.target_word,
       english: f.english,
       example: f.example,
+      exampleTranslation: f.example_translation ?? undefined,
       mnemonic: f.mnemonic,
+      pinyin: f.pinyin ?? undefined,
+      examplePinyin: f.example_pinyin ?? undefined,
       timestamp: f.created_at,
       languageCode: f.language_code,
     }));
@@ -229,7 +232,10 @@ export class SupabaseStorage {
       target_word: f.targetWord || f.spanish || '',
       english: f.english,
       example: f.example,
+      example_translation: f.exampleTranslation || null,
       mnemonic: f.mnemonic,
+      pinyin: f.pinyin || null,
+      example_pinyin: f.examplePinyin || null,
     }));
 
     // Use upsert to handle duplicates gracefully
@@ -262,7 +268,10 @@ export class SupabaseStorage {
       targetWord: f.target_word,
       english: f.english,
       example: f.example,
+      exampleTranslation: f.example_translation ?? undefined,
       mnemonic: f.mnemonic,
+      pinyin: f.pinyin ?? undefined,
+      examplePinyin: f.example_pinyin ?? undefined,
       timestamp: f.created_at,
       languageCode: f.language_code,
       conversationId: f.conversation_id,
@@ -282,7 +291,10 @@ export class SupabaseStorage {
       target_word: f.targetWord || f.spanish || '',
       english: f.english,
       example: f.example,
+      example_translation: f.exampleTranslation || null,
       mnemonic: f.mnemonic,
+      pinyin: f.pinyin || null,
+      example_pinyin: f.examplePinyin || null,
     }));
 
     // Use upsert to handle duplicates gracefully
diff --git a/frontend/src/styles/main.css b/frontend/src/styles/main.css
index ff35019..6354d96 100644
--- a/frontend/src/styles/main.css
+++ b/frontend/src/styles/main.css
@@ -1479,6 +1479,15 @@ body {
     padding: 0 8px;
 }
 
+.flashcard-pinyin {
+    font-size: 18px;
+    font-weight: 400;
+    color: #6b7280;
+    text-align: center;
+    margin-top: 4px;
+    letter-spacing: 0.5px;
+}
+
 .flashcard-english {
     font-size: 24px;
     font-weight: 600;
@@ -1501,6 +1510,64 @@ body {
     overflow-wrap: break-word;
     hyphens: auto;
     flex-shrink: 0;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    gap: 6px;
+}
+
+.flashcard-example.pronounceable {
+    cursor: pointer;
+    border-radius: 8px;
+    padding: 6px 10px;
+    transition: background-color 0.2s ease, color 0.2s ease;
+}
+
+.flashcard-example.pronounceable:hover {
+    background-color: rgba(59, 130, 246, 0.08);
+    color: #2563eb;
+}
+
+.flashcard-example.pronounceable:active {
+    background-color: rgba(59, 130, 246, 0.15);
+}
+
+.flashcard-example.pronouncing {
+    color: #2563eb;
+    background-color: rgba(59, 130, 246, 0.08);
+}
+
+.example-speaker-icon {
+    width: 16px;
+    height: 16px;
+    flex-shrink: 0;
+    opacity: 0.4;
+    transition: opacity 0.2s ease;
+}
+
+.flashcard-example.pronounceable:hover .example-speaker-icon {
+    opacity: 0.8;
+}
+
+.flashcard-example.pronouncing .example-speaker-icon {
+    opacity: 1;
+}
+
+.flashcard-example-pinyin {
+    font-size: 13px;
+    color: #9ca3af;
+    text-align: center;
+    margin-bottom: 4px;
+    margin-top: -10px;
+    letter-spacing: 0.3px;
+}
+
+.flashcard-example-translation {
+    font-size: 13px;
+    color: #9ca3af;
+    text-align: center;
+    margin-bottom: 12px;
+    line-height: 1.4;
 }
 
 .flashcard-mnemonic {
@@ -1550,6 +1617,10 @@ body {
     .flashcard-target-word {
         font-size: 24px;
     }
+
+    .flashcard-pinyin {
+        font-size: 14px;
+    }
     
     .flashcard-english {
         font-size: 18px;
@@ -1559,6 +1630,11 @@ body {
         font-size: 13px;
         padding: 0 8px;
     }
+
+    .flashcard-example-pinyin,
+    .flashcard-example-translation {
+        font-size: 11px;
+    }
     
     .flashcard-mnemonic {
         font-size: 12px;
diff --git a/frontend/src/types/index.ts b/frontend/src/types/index.ts
index 99eb49a..e61b492 100644
--- a/frontend/src/types/index.ts
+++ b/frontend/src/types/index.ts
@@ -53,7 +53,10 @@ export interface Flashcard {
   targetWord: string;
   english: string;
   example: string;
+  exampleTranslation?: string;
   mnemonic: string;
+  pinyin?: string;
+  examplePinyin?: string;
   timestamp?: string;
   languageCode?: string;
   conversationId?: string;
diff --git a/supabase/migrations/20240108000000_initial_schema.sql b/supabase/migrations/20240108000000_initial_schema.sql
index 2913abc..d52dd98 100644
--- a/supabase/migrations/20240108000000_initial_schema.sql
+++ b/supabase/migrations/20240108000000_initial_schema.sql
@@ -51,7 +51,10 @@ create table public.flashcards (
   target_word text not null,
   english text not null,
   example text,
+  example_translation text,
   mnemonic text,
+  pinyin text,
+  example_pinyin text,
   created_at timestamptz default now() not null,
   unique(user_id, conversation_id, target_word)
 );

From 9386a08f6d3daeb7f4d97edc12c1d8c06d5fc279 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 17:40:32 -0800
Subject: [PATCH 06/16] feat: create flashcards manually from highlighting
 words from the tutor

---
 backend/src/helpers/connection-manager.ts  | 33 +++++---
 backend/src/helpers/flashcard-processor.ts | 14 +++-
 backend/src/prompts/flashcard.njk          |  9 ++
 backend/src/services/websocket-handler.ts  | 96 +++++++++++++++++++++-
 frontend/src/components/ChatSection.tsx    | 62 +++++++++++++-
 frontend/src/context/AppContext.tsx        | 18 ++++
 frontend/src/styles/main.css               | 42 ++++++++++
 7 files changed, 257 insertions(+), 17 deletions(-)

diff --git a/backend/src/helpers/connection-manager.ts b/backend/src/helpers/connection-manager.ts
index 0a8f99c..e59fa72 100644
--- a/backend/src/helpers/connection-manager.ts
+++ b/backend/src/helpers/connection-manager.ts
@@ -465,9 +465,7 @@ export class ConnectionManager {
             }
           }
 
-          // Only send completion signals if not interrupted
           if (!wasInterrupted) {
-            // Send completion signals
             if (!this.isSwitchingConversation) {
               this.logger.debug('tts_stream_complete');
               this.sendToClient({
@@ -476,7 +474,6 @@ export class ConnectionManager {
                 timestamp: Date.now(),
               });
 
-              // Send conversation update with conversationId
               this.sendToClient({
                 type: 'conversation_update',
                 messages: connection.state.messages,
@@ -484,14 +481,17 @@ export class ConnectionManager {
                 timestamp: Date.now(),
               });
             }
-
-            // Trigger flashcard, feedback, and memory generation after TTS completes
-            this.triggerFlashcardGeneration();
-            this.triggerFeedbackGeneration();
-            this.triggerMemoryGeneration();
           } else {
-            this.logger.debug('tts_interrupted_skipping_completion');
+            this.logger.debug('tts_interrupted_skipping_audio_completion');
           }
+
+          // Always trigger flashcard/feedback/memory generation even if TTS was
+          // interrupted — the conversation content is still valid. Messages may
+          // have been rolled back for utterance stitching, but the remaining
+          // history still provides useful context for flashcard generation.
+          this.triggerFlashcardGeneration();
+          this.triggerFeedbackGeneration();
+          this.triggerMemoryGeneration();
           this.markProcessingComplete();
         },
 
@@ -683,7 +683,10 @@ export class ConnectionManager {
    * Trigger flashcard generation
    */
   private triggerFlashcardGeneration(): void {
-    if (!this.flashcardCallback) return;
+    if (!this.flashcardCallback) {
+      this.logger.debug('skipping_flashcard_no_callback');
+      return;
+    }
     if (this.conversationId !== this.processingConversationId) {
       this.logger.info('skipping_flashcard_generation_conversation_changed');
       return;
@@ -704,6 +707,16 @@ export class ConnectionManager {
         content: m.content,
       }));
 
+    if (recentMessages.length === 0) {
+      this.logger.info('skipping_flashcard_no_messages');
+      return;
+    }
+
+    this.logger.info(
+      { messageCount: recentMessages.length, language: snapshotLanguageCode },
+      'triggering_flashcard_generation'
+    );
+
     // Track pending flashcard generation
     this.pendingFlashcardGeneration = this.flashcardCallback(
       recentMessages,
diff --git a/backend/src/helpers/flashcard-processor.ts b/backend/src/helpers/flashcard-processor.ts
index 2b48628..f2b1149 100644
--- a/backend/src/helpers/flashcard-processor.ts
+++ b/backend/src/helpers/flashcard-processor.ts
@@ -60,7 +60,8 @@ export class FlashcardProcessor {
     messages: ConversationMessage[],
     count: number = 1,
     userContext?: UserContextInterface,
-    languageCodeOverride?: string
+    languageCodeOverride?: string,
+    forcedWord?: string
   ): Promise<Flashcard[]> {
     const executor = getFlashcardGraph();
 
@@ -81,7 +82,8 @@ export class FlashcardProcessor {
           messages,
           userContext,
           effectiveLanguageCode,
-          effectiveLanguageConfig
+          effectiveLanguageConfig,
+          forcedWord
         )
       );
     }
@@ -109,7 +111,8 @@ export class FlashcardProcessor {
     messages: ConversationMessage[],
     userContext?: UserContextInterface,
     languageCode?: string,
-    languageConfig?: LanguageConfig
+    languageConfig?: LanguageConfig,
+    forcedWord?: string
   ): Promise<Flashcard> {
     // Use explicitly passed language (snapshotted at trigger time) to avoid
     // reading from mutable this.languageCode which may change during async work
@@ -117,7 +120,7 @@ export class FlashcardProcessor {
     const effectiveLanguageConfig = languageConfig || this.languageConfig;
 
     try {
-      const input = {
+      const input: Record<string, unknown> = {
         studentName: 'Student',
         teacherName: effectiveLanguageConfig.teacherPersona.name,
         target_language: effectiveLanguageConfig.name,
@@ -125,6 +128,9 @@ export class FlashcardProcessor {
         messages: messages,
         flashcards: this.existingFlashcards,
       };
+      if (forcedWord) {
+        input.forced_word = forcedWord;
+      }
 
       let executionResult;
       try {
diff --git a/backend/src/prompts/flashcard.njk b/backend/src/prompts/flashcard.njk
index 79c3ac1..1fab8e6 100644
--- a/backend/src/prompts/flashcard.njk
+++ b/backend/src/prompts/flashcard.njk
@@ -1,6 +1,10 @@
 You are a system that generates flashcards for interesting new vocabulary for a {{target_language}} learning app.
 
+{% if forced_word %}
+Create a flashcard for the word/phrase "{{forced_word}}" as used in the conversation between {{studentName}} and {{teacherName}}. Generate the following:
+{% else %}
 Based on the ongoing conversation between {{studentName}} and {{teacherName}}, generate one flashcard with the following things:
+{% endif %}
 
 - The word in {{target_language}}
 - The translation in English
@@ -22,9 +26,14 @@ Based on the ongoing conversation between {{studentName}} and {{teacherName}}, g
 
 ## Guidelines
 
+{% if forced_word %}
+- The flashcard MUST be for "{{forced_word}}" — do not pick a different word
+- If the word appears in the conversation, use that context for the example sentence
+{% else %}
 - The word must be related to the topics used in the conversation
 - The word should be useful to the learner so they can continue the conversation with new vocabulary
 - Avoid cognates
+{% endif %}
 {% if language_code == "zh" %}- Include accurate pinyin with tone marks (e.g. "nǐ hǎo") for the word and the example sentence{% endif %}
 
 Now, return JSON with the following format:
diff --git a/backend/src/services/websocket-handler.ts b/backend/src/services/websocket-handler.ts
index 6115d6c..df404c0 100644
--- a/backend/src/services/websocket-handler.ts
+++ b/backend/src/services/websocket-handler.ts
@@ -279,6 +279,8 @@ function handleMessage(
       handleTextMessage(connectionId, ws, connectionManager, message);
     } else if (message.type === 'tts_pronounce_request') {
       handleTTSPronounce(connectionId, ws, message);
+    } else if (message.type === 'create_flashcard_request') {
+      handleCreateFlashcardRequest(connectionId, ws, connectionManager, message);
     } else {
       logger.debug(
         { connectionId, messageType: message.type },
@@ -630,7 +632,7 @@ async function handleTTSPronounce(
     return;
   }
 
-  if (text.length > 100) {
+  if (text.length > 500) {
     logger.warn(
       { connectionId, length: text.length },
       'tts_pronounce_text_too_long'
@@ -697,3 +699,95 @@ async function handleTTSPronounce(
     );
   }
 }
+
+async function handleCreateFlashcardRequest(
+  connectionId: string,
+  ws: WebSocket,
+  connectionManager: ConnectionManager,
+  message: { word?: string }
+): Promise<void> {
+  const word = message.word?.trim();
+  if (!word) {
+    ws.send(
+      JSON.stringify({
+        type: 'create_flashcard_error',
+        error: 'No word provided',
+      })
+    );
+    return;
+  }
+
+  const flashcardProcessor = flashcardProcessors.get(connectionId);
+  if (!flashcardProcessor) {
+    ws.send(
+      JSON.stringify({
+        type: 'create_flashcard_error',
+        error: 'No flashcard processor',
+      })
+    );
+    return;
+  }
+
+  const languageCode =
+    connectionManager.getLanguageCode() || DEFAULT_LANGUAGE_CODE;
+  const conversationId = connectionManager.getConversationId();
+
+  const conversationState = connectionManager.getConversationState();
+  const recentMessages = conversationState.messages.slice(-10).map((m) => ({
+    role: m.role,
+    content: m.content,
+  }));
+
+  logger.info(
+    { connectionId, word, languageCode },
+    'create_flashcard_request'
+  );
+
+  try {
+    const attrs = connectionAttributes.get(connectionId) || {};
+    const userContext = {
+      attributes: { timezone: attrs.timezone || '' },
+      targetingKey: attrs.userId || connectionId,
+    };
+
+    const flashcards = await flashcardProcessor.generateFlashcards(
+      recentMessages,
+      1,
+      userContext,
+      languageCode,
+      word
+    );
+
+    if (flashcards.length > 0 && ws.readyState === WebSocket.OPEN) {
+      ws.send(
+        JSON.stringify({
+          type: 'flashcards_generated',
+          flashcards,
+          conversationId: conversationId || null,
+        })
+      );
+      logger.info(
+        { connectionId, word, targetWord: flashcards[0].targetWord },
+        'flashcard_created_for_word'
+      );
+    } else {
+      ws.send(
+        JSON.stringify({
+          type: 'create_flashcard_error',
+          error: 'Failed to generate flashcard',
+        })
+      );
+    }
+  } catch (error) {
+    logger.error(
+      { err: error, connectionId, word },
+      'create_flashcard_request_error'
+    );
+    ws.send(
+      JSON.stringify({
+        type: 'create_flashcard_error',
+        error: 'Failed to generate flashcard',
+      })
+    );
+  }
+}
diff --git a/frontend/src/components/ChatSection.tsx b/frontend/src/components/ChatSection.tsx
index db6aefa..b8b5a19 100644
--- a/frontend/src/components/ChatSection.tsx
+++ b/frontend/src/components/ChatSection.tsx
@@ -11,8 +11,14 @@ import { Message } from './Message';
 import { StreamingMessage } from './StreamingMessage';
 
 export function ChatSection() {
-  const { state, toggleRecording, sendTextMessage } = useApp();
+  const { state, toggleRecording, sendTextMessage, createFlashcardForWord } =
+    useApp();
   const [textInput, setTextInput] = useState('');
+  const [contextMenu, setContextMenu] = useState<{
+    x: number;
+    y: number;
+    word: string;
+  } | null>(null);
   const {
     chatHistory,
     currentTranscript,
@@ -104,6 +110,37 @@ export function ChatSection() {
     [textInput, sendTextMessage]
   );
 
+  const handleContextMenu = useCallback(
+    (e: React.MouseEvent) => {
+      const selection = window.getSelection();
+      const selectedText = selection?.toString().trim();
+      if (!selectedText) return;
+
+      e.preventDefault();
+      setContextMenu({ x: e.clientX, y: e.clientY, word: selectedText });
+    },
+    []
+  );
+
+  const handleCreateFlashcard = useCallback(() => {
+    if (contextMenu?.word) {
+      createFlashcardForWord(contextMenu.word);
+    }
+    setContextMenu(null);
+  }, [contextMenu, createFlashcardForWord]);
+
+  // Close context menu on click anywhere or scroll
+  useEffect(() => {
+    if (!contextMenu) return;
+    const close = () => setContextMenu(null);
+    window.addEventListener('click', close);
+    window.addEventListener('scroll', close, true);
+    return () => {
+      window.removeEventListener('click', close);
+      window.removeEventListener('scroll', close, true);
+    };
+  }, [contextMenu]);
+
   const isConnected = connectionStatus === 'connected';
 
   return (
@@ -134,7 +171,12 @@ export function ChatSection() {
         </button>
       </div>
       <div className="chat-container">
-        <div className="messages" id="messages" ref={messagesContainerRef}>
+        <div
+          className="messages"
+          id="messages"
+          ref={messagesContainerRef}
+          onContextMenu={handleContextMenu}
+        >
           {/* Loading overlay when not connected */}
           {connectionStatus === 'connecting' && (
             <div className="chat-loading">
@@ -223,6 +265,22 @@ export function ChatSection() {
           </button>
         </form>
       </div>
+      {contextMenu && isConnected && (
+        <div
+          className="flashcard-context-menu"
+          style={{ top: contextMenu.y, left: contextMenu.x }}
+          onClick={handleCreateFlashcard}
+        >
+          <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+            <rect x="2" y="3" width="20" height="18" rx="2" />
+            <line x1="12" y1="8" x2="12" y2="16" />
+            <line x1="8" y1="12" x2="16" y2="12" />
+          </svg>
+          Create flashcard for &ldquo;{contextMenu.word.length > 30
+            ? contextMenu.word.slice(0, 30) + '…'
+            : contextMenu.word}&rdquo;
+        </div>
+      )}
     </section>
   );
 }
diff --git a/frontend/src/context/AppContext.tsx b/frontend/src/context/AppContext.tsx
index f5cea64..071acaa 100644
--- a/frontend/src/context/AppContext.tsx
+++ b/frontend/src/context/AppContext.tsx
@@ -261,6 +261,7 @@ interface AppContextType {
   handleInterrupt: () => void;
   sendTextMessage: (text: string) => void;
   pronounceWord: (text: string) => void;
+  createFlashcardForWord: (word: string) => void;
   // Conversation actions
   selectConversation: (conversationId: string) => void;
   createNewConversation: () => void;
@@ -1332,6 +1333,21 @@ export function AppProvider({ children }: AppProviderProps) {
     [state.connectionStatus]
   );
 
+  // Request flashcard generation for a specific word
+  const createFlashcardForWord = useCallback(
+    (word: string) => {
+      const wsClient = wsClientRef.current;
+      const trimmed = word.trim();
+      if (state.connectionStatus !== 'connected' || !trimmed) return;
+
+      wsClient.send({
+        type: 'create_flashcard_request',
+        word: trimmed,
+      });
+    },
+    [state.connectionStatus]
+  );
+
   // Select a conversation from the sidebar
   const selectConversation = useCallback(
     (conversationId: string) => {
@@ -1673,6 +1689,7 @@ export function AppProvider({ children }: AppProviderProps) {
       handleInterrupt,
       sendTextMessage,
       pronounceWord,
+      createFlashcardForWord,
       selectConversation,
       createNewConversation,
       deleteConversation,
@@ -1691,6 +1708,7 @@ export function AppProvider({ children }: AppProviderProps) {
       handleInterrupt,
       sendTextMessage,
       pronounceWord,
+      createFlashcardForWord,
       selectConversation,
       createNewConversation,
       deleteConversation,
diff --git a/frontend/src/styles/main.css b/frontend/src/styles/main.css
index 6354d96..8c3f57a 100644
--- a/frontend/src/styles/main.css
+++ b/frontend/src/styles/main.css
@@ -1322,6 +1322,48 @@ body {
     cursor: not-allowed;
 }
 
+/* Flashcard context menu (right-click to create flashcard) */
+.flashcard-context-menu {
+    position: fixed;
+    z-index: 1000;
+    background: #ffffff;
+    border: 1px solid #e5e5e5;
+    border-radius: 8px;
+    padding: 8px 14px;
+    font-size: 14px;
+    color: #374151;
+    cursor: pointer;
+    box-shadow: 0 4px 16px rgba(0, 0, 0, 0.12);
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    white-space: nowrap;
+    user-select: none;
+    animation: context-menu-in 0.1s ease-out;
+}
+
+.flashcard-context-menu:hover {
+    background: #f3f4f6;
+    color: #1a1a1a;
+}
+
+.flashcard-context-menu svg {
+    width: 16px;
+    height: 16px;
+    flex-shrink: 0;
+}
+
+@keyframes context-menu-in {
+    from {
+        opacity: 0;
+        transform: scale(0.95);
+    }
+    to {
+        opacity: 1;
+        transform: scale(1);
+    }
+}
+
 /* Flashcards Section */
 .flashcards-section {
     background: #ffffff;

From c217591847e87e605368f7a8705d83e1f70bfc50 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 17:53:41 -0800
Subject: [PATCH 07/16] chore: add pretty logs option for prod

---
 backend/src/utils/logger.ts | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/backend/src/utils/logger.ts b/backend/src/utils/logger.ts
index 3403cb8..40e2a33 100644
--- a/backend/src/utils/logger.ts
+++ b/backend/src/utils/logger.ts
@@ -10,7 +10,8 @@
 
 import pino from 'pino';
 
-const isDevelopment = process.env.NODE_ENV !== 'production';
+const usePrettyLogs =
+  process.env.LOG_PRETTY === 'true' || process.env.NODE_ENV !== 'production';
 
 /**
  * Root logger instance
@@ -18,7 +19,7 @@ const isDevelopment = process.env.NODE_ENV !== 'production';
  */
 export const logger = pino({
   level: process.env.LOG_LEVEL || 'info',
-  transport: isDevelopment
+  transport: usePrettyLogs
     ? {
         target: 'pino-pretty',
         options: {

From 931411d78a1d156289a9f983df44ac3c97465512 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 18:10:07 -0800
Subject: [PATCH 08/16] fix: align github/render icons better

---
 frontend/src/styles/main.css | 1 +
 1 file changed, 1 insertion(+)

diff --git a/frontend/src/styles/main.css b/frontend/src/styles/main.css
index 8c3f57a..1976019 100644
--- a/frontend/src/styles/main.css
+++ b/frontend/src/styles/main.css
@@ -1819,6 +1819,7 @@ body {
     letter-spacing: 0.02em;
     width: 100%;
     box-sizing: border-box;
+    justify-content: flex-start;
 }
 
 @media (max-width: 768px) {

From 45f28b9fcc97d3fed618818ecb7cf4fc0bb3477c Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 18:33:46 -0800
Subject: [PATCH 09/16] fix: better audio setup for mobile

---
 frontend/src/App.tsx                 |  5 +--
 frontend/src/context/AppContext.tsx  |  7 ++--
 frontend/src/services/AudioPlayer.ts | 50 +++++++++++++++++++++++++---
 3 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index dd39db8..94528a3 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -34,8 +34,9 @@ function AppContent() {
               </div>
             </div>
           </main>
-          {/* Hidden audio element for iOS compatibility */}
-          <audio id="iosAudioElement" style={{ display: 'none' }} playsInline />
+          {/* Hidden audio elements for TTS playback -- routed through here so browser AEC can cancel echo */}
+          <audio id="ttsAudioOutput" style={{ display: 'none' }} playsInline />
+          <audio id="ttsAudioOutputFlashcard" style={{ display: 'none' }} playsInline />
         </div>
       </div>
       {/* Floating Action Buttons */}
diff --git a/frontend/src/context/AppContext.tsx b/frontend/src/context/AppContext.tsx
index 071acaa..5656f8e 100644
--- a/frontend/src/context/AppContext.tsx
+++ b/frontend/src/context/AppContext.tsx
@@ -289,9 +289,12 @@ export function AppProvider({ children }: AppProviderProps) {
   const wsClientRef = useRef(wsClientInstance);
   const audioHandlerInstance = useMemo(() => new AudioHandler(), []);
   const audioHandlerRef = useRef(audioHandlerInstance);
-  const audioPlayerInstance = useMemo(() => new AudioPlayer(), []);
+  const audioPlayerInstance = useMemo(() => new AudioPlayer('ttsAudioOutput'), []);
   const audioPlayerRef = useRef(audioPlayerInstance);
-  const ttsAudioPlayerInstance = useMemo(() => new AudioPlayer(), []);
+  const ttsAudioPlayerInstance = useMemo(
+    () => new AudioPlayer('ttsAudioOutputFlashcard'),
+    []
+  );
   const ttsAudioPlayerRef = useRef(ttsAudioPlayerInstance);
   const hasMigratedRef = useRef(false);
   const conversationsLoadedRef = useRef(false);
diff --git a/frontend/src/services/AudioPlayer.ts b/frontend/src/services/AudioPlayer.ts
index 61a377d..d13de83 100644
--- a/frontend/src/services/AudioPlayer.ts
+++ b/frontend/src/services/AudioPlayer.ts
@@ -15,10 +15,14 @@ export class AudioPlayer {
   private nextStartTime: number = 0;
   private scheduledSources: AudioBufferSourceNode[] = [];
   private scheduleInterval: ReturnType<typeof setInterval> | null = null;
-  private readonly SCHEDULE_AHEAD_TIME = 0.1; // Look 100ms ahead
-  private readonly FADE_SAMPLES = 128; // ~2.7ms at 48kHz, ~8ms at 16kHz
-
-  constructor() {
+  private readonly SCHEDULE_AHEAD_TIME = 0.3; // Schedule 300ms ahead for mobile timer resilience
+  private readonly FADE_SAMPLES = 256; // ~11ms at 22050Hz TTS rate
+  private mediaStreamDest: MediaStreamAudioDestinationNode | null = null;
+  private audioElement: HTMLAudioElement | null = null;
+  private audioElementId: string;
+
+  constructor(audioElementId: string = 'ttsAudioOutput') {
+    this.audioElementId = audioElementId;
     this.isIOS =
       /iPad|iPhone|iPod/.test(navigator.userAgent) ||
       (navigator.platform === 'MacIntel' && navigator.maxTouchPoints > 1);
@@ -55,6 +59,31 @@ export class AudioPlayer {
         await this.audioContext.resume();
       }
 
+      // Route through <audio> element so browser AEC has a reference signal.
+      // This lets mobile Safari/Chrome cancel echo from TTS playback while
+      // keeping the mic hot for user interruption.
+      try {
+        this.mediaStreamDest =
+          this.audioContext.createMediaStreamDestination();
+        this.audioElement = document.getElementById(
+          this.audioElementId
+        ) as HTMLAudioElement | null;
+        if (this.audioElement) {
+          this.audioElement.srcObject = this.mediaStreamDest.stream;
+          this.audioElement.play().catch(() => {});
+        }
+        console.log(
+          '[AudioPlayer] AEC routing enabled via MediaStreamDestination'
+        );
+      } catch (aecError) {
+        console.warn(
+          '[AudioPlayer] MediaStreamDestination unavailable, using direct output',
+          aecError
+        );
+        this.mediaStreamDest = null;
+        this.audioElement = null;
+      }
+
       console.log(
         'Audio player initialized with sample rate:',
         this.audioContext.sampleRate
@@ -106,6 +135,11 @@ export class AudioPlayer {
       await this.initialize();
     }
 
+    // Ensure <audio> element is playing (may have been blocked by autoplay policy earlier)
+    if (this.audioElement && this.audioElement.paused) {
+      this.audioElement.play().catch(() => {});
+    }
+
     try {
       // Decode base64 to binary
       const binaryString = atob(base64Audio);
@@ -251,7 +285,7 @@ export class AudioPlayer {
 
     const source = this.audioContext.createBufferSource();
     source.buffer = audioBuffer;
-    source.connect(this.audioContext.destination);
+    source.connect(this.mediaStreamDest ?? this.audioContext.destination);
 
     this.scheduledSources.push(source);
 
@@ -353,6 +387,12 @@ export class AudioPlayer {
     this.stop();
     this.stopScheduleInterval();
 
+    if (this.audioElement) {
+      this.audioElement.srcObject = null;
+      this.audioElement = null;
+    }
+    this.mediaStreamDest = null;
+
     if (this.audioContext) {
       this.audioContext.close();
       this.audioContext = null;

From 25b1d057432941fb3cee44fbe91f5f922a63118a Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 18:47:11 -0800
Subject: [PATCH 10/16] fix: add echo gate on mobile

---
 frontend/src/App.tsx                  |  5 +--
 frontend/src/context/AppContext.tsx   | 55 ++++++++++++++++++++++++---
 frontend/src/services/AudioHandler.ts | 25 ++++++++++--
 frontend/src/services/AudioPlayer.ts  | 44 +--------------------
 4 files changed, 76 insertions(+), 53 deletions(-)

diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index 94528a3..dd39db8 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -34,9 +34,8 @@ function AppContent() {
               </div>
             </div>
           </main>
-          {/* Hidden audio elements for TTS playback -- routed through here so browser AEC can cancel echo */}
-          <audio id="ttsAudioOutput" style={{ display: 'none' }} playsInline />
-          <audio id="ttsAudioOutputFlashcard" style={{ display: 'none' }} playsInline />
+          {/* Hidden audio element for iOS compatibility */}
+          <audio id="iosAudioElement" style={{ display: 'none' }} playsInline />
         </div>
       </div>
       {/* Floating Action Buttons */}
diff --git a/frontend/src/context/AppContext.tsx b/frontend/src/context/AppContext.tsx
index 5656f8e..db022e2 100644
--- a/frontend/src/context/AppContext.tsx
+++ b/frontend/src/context/AppContext.tsx
@@ -289,12 +289,9 @@ export function AppProvider({ children }: AppProviderProps) {
   const wsClientRef = useRef(wsClientInstance);
   const audioHandlerInstance = useMemo(() => new AudioHandler(), []);
   const audioHandlerRef = useRef(audioHandlerInstance);
-  const audioPlayerInstance = useMemo(() => new AudioPlayer('ttsAudioOutput'), []);
+  const audioPlayerInstance = useMemo(() => new AudioPlayer(), []);
   const audioPlayerRef = useRef(audioPlayerInstance);
-  const ttsAudioPlayerInstance = useMemo(
-    () => new AudioPlayer('ttsAudioOutputFlashcard'),
-    []
-  );
+  const ttsAudioPlayerInstance = useMemo(() => new AudioPlayer(), []);
   const ttsAudioPlayerRef = useRef(ttsAudioPlayerInstance);
   const hasMigratedRef = useRef(false);
   const conversationsLoadedRef = useRef(false);
@@ -407,6 +404,54 @@ export function AppProvider({ children }: AppProviderProps) {
     };
   }, []);
 
+  // Echo gate: mute mic while TTS is playing to prevent speaker output
+  // from being picked up as user speech on mobile
+  useEffect(() => {
+    const audioPlayer = audioPlayerRef.current;
+    const ttsAudioPlayer = ttsAudioPlayerRef.current;
+    const audioHandler = audioHandlerRef.current;
+
+    let mainPlaying = false;
+    let ttsPlaying = false;
+
+    const updateMuteState = () => {
+      if (mainPlaying || ttsPlaying) {
+        audioHandler.mute();
+      } else {
+        audioHandler.unmute();
+      }
+    };
+
+    const onMainStarted = () => {
+      mainPlaying = true;
+      updateMuteState();
+    };
+    const onMainDone = () => {
+      mainPlaying = false;
+      updateMuteState();
+    };
+    const onTtsStarted = () => {
+      ttsPlaying = true;
+      updateMuteState();
+    };
+    const onTtsDone = () => {
+      ttsPlaying = false;
+      updateMuteState();
+    };
+
+    audioPlayer.on('playback_started', onMainStarted);
+    audioPlayer.on('playback_finished', onMainDone);
+    audioPlayer.on('playback_stopped', onMainDone);
+
+    ttsAudioPlayer.on('playback_started', onTtsStarted);
+    ttsAudioPlayer.on('playback_finished', onTtsDone);
+    ttsAudioPlayer.on('playback_stopped', onTtsDone);
+
+    return () => {
+      audioHandler.unmute();
+    };
+  }, []);
+
   // Load initial state (conversations across all languages)
   // Only run if Supabase sync hasn't already loaded conversations
   useEffect(() => {
diff --git a/frontend/src/services/AudioHandler.ts b/frontend/src/services/AudioHandler.ts
index 1c0a51f..a1105be 100644
--- a/frontend/src/services/AudioHandler.ts
+++ b/frontend/src/services/AudioHandler.ts
@@ -9,6 +9,7 @@ export class AudioHandler {
   private stream: MediaStream | null = null;
   private microphone: MediaStreamAudioSourceNode | null = null;
   private isStreaming = false;
+  private isMuted = false;
   private listeners = new Map<string, EventCallback[]>();
   private isIOS: boolean;
   private iosHandler: IOSAudioHandler | null;
@@ -81,7 +82,7 @@ export class AudioHandler {
         await this.iosHandler.unlockAudioContext?.();
 
         const success = await this.iosHandler.startMicrophone?.((audioData) => {
-          if (this.isStreaming) {
+          if (this.isStreaming && !this.isMuted) {
             this.emit('audioChunk', audioData);
           }
         });
@@ -154,7 +155,7 @@ export class AudioHandler {
       );
 
       this.workletNode.port.onmessage = (event: MessageEvent) => {
-        if (this.isStreaming) {
+        if (this.isStreaming && !this.isMuted) {
           const int16Buffer = event.data as ArrayBuffer;
           const base64Audio = btoa(
             String.fromCharCode(...new Uint8Array(int16Buffer))
@@ -189,7 +190,7 @@ export class AudioHandler {
     let buffer: Float32Array | null = null;
 
     this.scriptProcessor.onaudioprocess = (event: AudioProcessingEvent) => {
-      if (this.isStreaming) {
+      if (this.isStreaming && !this.isMuted) {
         const inputData = event.inputBuffer.getChannelData(0);
 
         // Append new data to the buffer
@@ -288,4 +289,22 @@ export class AudioHandler {
   getIsStreaming(): boolean {
     return this.isStreaming;
   }
+
+  mute(): void {
+    if (!this.isMuted) {
+      this.isMuted = true;
+      console.log('[AudioHandler] Muted — suppressing audio chunks during TTS playback');
+    }
+  }
+
+  unmute(): void {
+    if (this.isMuted) {
+      this.isMuted = false;
+      console.log('[AudioHandler] Unmuted — resuming audio chunk emission');
+    }
+  }
+
+  getIsMuted(): boolean {
+    return this.isMuted;
+  }
 }
diff --git a/frontend/src/services/AudioPlayer.ts b/frontend/src/services/AudioPlayer.ts
index d13de83..0910757 100644
--- a/frontend/src/services/AudioPlayer.ts
+++ b/frontend/src/services/AudioPlayer.ts
@@ -17,12 +17,8 @@ export class AudioPlayer {
   private scheduleInterval: ReturnType<typeof setInterval> | null = null;
   private readonly SCHEDULE_AHEAD_TIME = 0.3; // Schedule 300ms ahead for mobile timer resilience
   private readonly FADE_SAMPLES = 256; // ~11ms at 22050Hz TTS rate
-  private mediaStreamDest: MediaStreamAudioDestinationNode | null = null;
-  private audioElement: HTMLAudioElement | null = null;
-  private audioElementId: string;
 
-  constructor(audioElementId: string = 'ttsAudioOutput') {
-    this.audioElementId = audioElementId;
+  constructor() {
     this.isIOS =
       /iPad|iPhone|iPod/.test(navigator.userAgent) ||
       (navigator.platform === 'MacIntel' && navigator.maxTouchPoints > 1);
@@ -59,31 +55,6 @@ export class AudioPlayer {
         await this.audioContext.resume();
       }
 
-      // Route through <audio> element so browser AEC has a reference signal.
-      // This lets mobile Safari/Chrome cancel echo from TTS playback while
-      // keeping the mic hot for user interruption.
-      try {
-        this.mediaStreamDest =
-          this.audioContext.createMediaStreamDestination();
-        this.audioElement = document.getElementById(
-          this.audioElementId
-        ) as HTMLAudioElement | null;
-        if (this.audioElement) {
-          this.audioElement.srcObject = this.mediaStreamDest.stream;
-          this.audioElement.play().catch(() => {});
-        }
-        console.log(
-          '[AudioPlayer] AEC routing enabled via MediaStreamDestination'
-        );
-      } catch (aecError) {
-        console.warn(
-          '[AudioPlayer] MediaStreamDestination unavailable, using direct output',
-          aecError
-        );
-        this.mediaStreamDest = null;
-        this.audioElement = null;
-      }
-
       console.log(
         'Audio player initialized with sample rate:',
         this.audioContext.sampleRate
@@ -135,11 +106,6 @@ export class AudioPlayer {
       await this.initialize();
     }
 
-    // Ensure <audio> element is playing (may have been blocked by autoplay policy earlier)
-    if (this.audioElement && this.audioElement.paused) {
-      this.audioElement.play().catch(() => {});
-    }
-
     try {
       // Decode base64 to binary
       const binaryString = atob(base64Audio);
@@ -285,7 +251,7 @@ export class AudioPlayer {
 
     const source = this.audioContext.createBufferSource();
     source.buffer = audioBuffer;
-    source.connect(this.mediaStreamDest ?? this.audioContext.destination);
+    source.connect(this.audioContext.destination);
 
     this.scheduledSources.push(source);
 
@@ -387,12 +353,6 @@ export class AudioPlayer {
     this.stop();
     this.stopScheduleInterval();
 
-    if (this.audioElement) {
-      this.audioElement.srcObject = null;
-      this.audioElement = null;
-    }
-    this.mediaStreamDest = null;
-
     if (this.audioContext) {
       this.audioContext.close();
       this.audioContext = null;

From 2ef3f7b5e3e36b502f1406f062f090ae842e1ade Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 19:52:17 -0800
Subject: [PATCH 11/16] fix: hide render and github icons on mobile

---
 frontend/src/components/Sidebar.tsx | 36 +++++++++++++++++++++++++++++
 frontend/src/context/AppContext.tsx | 10 ++++++--
 frontend/src/styles/main.css        | 34 +++++++++++++++++++++++++--
 3 files changed, 76 insertions(+), 4 deletions(-)

diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx
index dd9b9bd..f13b32f 100644
--- a/frontend/src/components/Sidebar.tsx
+++ b/frontend/src/components/Sidebar.tsx
@@ -255,6 +255,42 @@ export function Sidebar() {
             </ul>
           )}
         </div>
+
+        <div className="sidebar-footer">
+          <a
+            href="https://render.com/deploy?repo=https://github.com/inworld-ai/language-learning-node"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="sidebar-footer-link"
+          >
+            <svg
+              width="16"
+              height="16"
+              viewBox="0 0 24 24"
+              fill="none"
+              stroke="currentColor"
+              strokeWidth="2"
+              strokeLinecap="round"
+              strokeLinejoin="round"
+            >
+              <path d="M12 2L2 7l10 5 10-5-10-5z" />
+              <path d="M2 17l10 5 10-5" />
+              <path d="M2 12l10 5 10-5" />
+            </svg>
+            <span>Deploy on Render</span>
+          </a>
+          <a
+            href="https://github.com/inworld-ai/language-learning-node"
+            target="_blank"
+            rel="noopener noreferrer"
+            className="sidebar-footer-link"
+          >
+            <svg width="16" height="16" viewBox="0 0 24 24" fill="currentColor">
+              <path d="M12 0C5.37 0 0 5.37 0 12c0 5.31 3.435 9.795 8.205 11.385.6.105.825-.255.825-.57 0-.285-.015-1.23-.015-2.235-3.015.555-3.795-.735-4.035-1.41-.135-.345-.72-1.41-1.23-1.695-.42-.225-1.02-.78-.015-.795.945-.015 1.62.87 1.845 1.23 1.08 1.815 2.805 1.305 3.495.99.105-.78.42-1.305.765-1.605-2.67-.3-5.46-1.335-5.46-5.925 0-1.305.465-2.385 1.23-3.225-.12-.3-.54-1.53.12-3.18 0 0 1.005-.315 3.3 1.23.96-.27 1.98-.405 3-.405s2.04.135 3 .405c2.295-1.56 3.3-1.23 3.3-1.23.66 1.65.24 2.88.12 3.18.765.84 1.23 1.905 1.23 3.225 0 4.605-2.805 5.625-5.475 5.925.435.375.81 1.095.81 2.22 0 1.605-.015 2.895-.015 3.3 0 .315.225.69.825.57A12.02 12.02 0 0 0 24 12c0-6.63-5.37-12-12-12z" />
+            </svg>
+            <span>GitHub</span>
+          </a>
+        </div>
       </aside>
     </>
   );
diff --git a/frontend/src/context/AppContext.tsx b/frontend/src/context/AppContext.tsx
index db022e2..11cab10 100644
--- a/frontend/src/context/AppContext.tsx
+++ b/frontend/src/context/AppContext.tsx
@@ -404,9 +404,15 @@ export function AppProvider({ children }: AppProviderProps) {
     };
   }, []);
 
-  // Echo gate: mute mic while TTS is playing to prevent speaker output
-  // from being picked up as user speech on mobile
+  // Echo gate (mobile only): mute mic while TTS is playing to prevent
+  // speaker output from being picked up as user speech. Desktop browsers
+  // have reliable echo cancellation so we leave interruption enabled there.
   useEffect(() => {
+    const isMobile =
+      /Android|iPhone|iPad|iPod/i.test(navigator.userAgent) ||
+      (navigator.platform === 'MacIntel' && navigator.maxTouchPoints > 1);
+    if (!isMobile) return;
+
     const audioPlayer = audioPlayerRef.current;
     const ttsAudioPlayer = ttsAudioPlayerRef.current;
     const audioHandler = audioHandlerRef.current;
diff --git a/frontend/src/styles/main.css b/frontend/src/styles/main.css
index 1976019..8adc900 100644
--- a/frontend/src/styles/main.css
+++ b/frontend/src/styles/main.css
@@ -1822,10 +1822,40 @@ body {
     justify-content: flex-start;
 }
 
+/* Sidebar Footer Links (mobile only) */
+.sidebar-footer {
+    display: none;
+    flex-direction: column;
+    gap: 4px;
+    padding: 12px;
+    border-top: 1px solid #e5e5e5;
+}
+
+.sidebar-footer-link {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    padding: 10px 12px;
+    border-radius: 8px;
+    color: #4b5563;
+    text-decoration: none;
+    font-size: 13px;
+    font-weight: 500;
+    transition: background 0.15s ease, color 0.15s ease;
+}
+
+.sidebar-footer-link:hover {
+    background: #f0f0f0;
+    color: #1a1a1a;
+}
+
 @media (max-width: 768px) {
     .floating-buttons {
-        bottom: 16px;
-        right: 16px;
+        display: none;
+    }
+
+    .sidebar-footer {
+        display: flex;
     }
 }
 

From 60210b49022dc7feb5bd4809fbbf56ef8301a987 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 20:00:04 -0800
Subject: [PATCH 12/16] feat: update app and readme to support assembly or
 soniox

---
 CONTRIBUTING.md                       |  3 ++
 README.md                             | 43 ++++++++++++++++-----------
 backend/.env.example                  |  4 +--
 backend/src/config/server.ts          | 12 +++-----
 backend/src/server.ts                 | 10 ++++---
 backend/src/services/graph-service.ts | 30 +++++++------------
 frontend/README.md                    |  2 +-
 render.yaml                           |  2 --
 8 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 51f0e4c..2d1d8a2 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -33,7 +33,10 @@ Thank you for your interest in contributing to the Language Learning App! This d
 
    ```bash
    INWORLD_API_KEY=your_api_key_here
+   # Set one of these:
    ASSEMBLY_AI_API_KEY=your_api_key_here
+   # or
+   SONIOX_API_KEY=your_api_key_here
    ```
 
 5. **Verify the setup**:
diff --git a/README.md b/README.md
index 20af85d..44283db 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ A conversational language learning app powered by Inworld AI Runtime. Practice s
 - Node.js (v20 or higher)
 - npm
 - An Inworld AI account and API key
-- An AssemblyAI account and API key (for speech-to-text)
+- An [AssemblyAI](https://www.assemblyai.com/) or [Soniox](https://soniox.com/) account and API key (for speech-to-text)
 
 ## Get Started
 
@@ -35,17 +35,24 @@ This installs dependencies for the root, backend, and frontend automatically.
 
 ### Step 3: Configure Environment Variables
 
-Create a `backend/.env` file:
+Create a `backend/.env` file with your Inworld key and **one** of the two STT provider keys:
 
 ```bash
 INWORLD_API_KEY=your_inworld_base64_key
+
+# Pick one STT provider:
 ASSEMBLY_AI_API_KEY=your_assemblyai_key
+# or
+SONIOX_API_KEY=your_soniox_key
 ```
 
-| Service        | Get Key From                                        | Purpose                           |
-| -------------- | --------------------------------------------------- | --------------------------------- |
-| **Inworld**    | [platform.inworld.ai](https://platform.inworld.ai/) | AI conversations (Base64 API key) |
-| **AssemblyAI** | [assemblyai.com](https://www.assemblyai.com/)       | Speech-to-text                    |
+The server auto-detects which STT provider to use based on which API key is present. If both are set, Soniox takes priority.
+
+| Service        | Get Key From                                         | Purpose                           |
+| -------------- | ---------------------------------------------------- | --------------------------------- |
+| **Inworld**    | [platform.inworld.ai](https://platform.inworld.ai/)  | AI conversations (Base64 API key) |
+| **AssemblyAI** | [assemblyai.com](https://www.assemblyai.com/)         | Speech-to-text (option 1)         |
+| **Soniox**     | [soniox.com](https://soniox.com/)                     | Speech-to-text (option 2)         |
 
 ### Step 4: Run the Application
 
@@ -143,7 +150,7 @@ The app uses a real-time audio streaming architecture:
 
 1. **Frontend** captures microphone audio and streams it via WebSocket
 2. **Backend** processes audio through an Inworld Runtime graph:
-   - AssemblyAI handles speech-to-text with voice activity detection
+   - Speech-to-text with voice activity detection (AssemblyAI or Soniox)
    - LLM generates contextual responses in the target language
    - TTS converts responses back to audio
 3. **Flashcards** are auto-generated from conversation vocabulary
@@ -166,16 +173,18 @@ Without Supabase, the app works in anonymous mode using localStorage (no memory
 
 ## Environment Variables Reference
 
-| Variable                    | Required | Description                                                        |
-| --------------------------- | -------- | ------------------------------------------------------------------ |
-| `INWORLD_API_KEY`           | Yes      | Inworld AI Base64 API key                                          |
-| `ASSEMBLY_AI_API_KEY`       | Yes      | AssemblyAI API key                                                 |
-| `PORT`                      | No       | Server port (default: 3000)                                        |
-| `LOG_LEVEL`                 | No       | `trace`, `debug`, `info`, `warn`, `error`, `fatal` (default: info) |
-| `NODE_ENV`                  | No       | Set to `production` for production log format                      |
-| `ASSEMBLY_AI_EAGERNESS`     | No       | Turn detection: `low`, `medium`, `high` (default: high)            |
-| `SUPABASE_URL`              | No       | Supabase project URL (enables memory feature)                      |
-| `SUPABASE_SECRET_KEY`       | No       | Supabase secret key (for backend memory storage)                   |
+| Variable                    | Required           | Description                                                        |
+| --------------------------- | ------------------ | ------------------------------------------------------------------ |
+| `INWORLD_API_KEY`           | Yes                | Inworld AI Base64 API key                                          |
+| `ASSEMBLY_AI_API_KEY`       | One of these two ↕ | AssemblyAI API key                                                 |
+| `SONIOX_API_KEY`            | One of these two ↑ | Soniox API key (takes priority if both are set)                    |
+| `PORT`                      | No                 | Server port (default: 3000)                                        |
+| `LOG_LEVEL`                 | No                 | `trace`, `debug`, `info`, `warn`, `error`, `fatal` (default: info) |
+| `NODE_ENV`                  | No                 | Set to `production` for production log format                      |
+| `ASSEMBLY_AI_EAGERNESS`     | No                 | AssemblyAI turn detection: `low`, `medium`, `high` (default: high) |
+| `SONIOX_EAGERNESS`          | No                 | Soniox endpoint detection: `low`, `medium`, `high` (default: high) |
+| `SUPABASE_URL`              | No                 | Supabase project URL (enables memory feature)                      |
+| `SUPABASE_SECRET_KEY`       | No                 | Supabase secret key (for backend memory storage)                   |
 
 ## Testing
 
diff --git a/backend/.env.example b/backend/.env.example
index 0cfaf91..2366e67 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -1,8 +1,8 @@
 INWORLD_API_KEY=
 
-STT_PROVIDER=assembly
+# Speech-to-text: set ONE of these (Soniox takes priority if both are set)
 ASSEMBLY_AI_API_KEY=
 SONIOX_API_KEY=
 
 SUPABASE_URL=
-SUPABASE_SECRET_KEY=
\ No newline at end of file
+SUPABASE_SECRET_KEY=
diff --git a/backend/src/config/server.ts b/backend/src/config/server.ts
index dbb36fd..f296e0f 100644
--- a/backend/src/config/server.ts
+++ b/backend/src/config/server.ts
@@ -92,11 +92,6 @@ export const serverConfig = {
    */
   port: Number(process.env.PORT) || 3000,
 
-  /**
-   * STT provider selection ('assembly' | 'soniox')
-   */
-  sttProvider: (process.env.STT_PROVIDER || 'assembly') as STTProvider,
-
   /**
    * Audio processing settings
    */
@@ -154,11 +149,12 @@ export function getAssemblyAISettingsForEagerness(
 }
 
 /**
- * Get the active STT provider at call time (after dotenv loads).
- * Do NOT use serverConfig.sttProvider — it is evaluated at module load time before dotenv.
+ * Auto-detect the active STT provider based on which API key is configured.
+ * SONIOX_API_KEY takes priority if both keys are present.
  */
 export function getSttProvider(): STTProvider {
-  return (process.env.STT_PROVIDER || 'assembly') as STTProvider;
+  if (process.env.SONIOX_API_KEY) return 'soniox';
+  return 'assembly';
 }
 
 /**
diff --git a/backend/src/server.ts b/backend/src/server.ts
index 0b61b34..ed771ff 100644
--- a/backend/src/server.ts
+++ b/backend/src/server.ts
@@ -1,7 +1,7 @@
 /**
  * Language Learning Server - Inworld Runtime 0.9
  *
- * This server uses a long-running circular graph with AssemblyAI for VAD/STT.
+ * This server uses a long-running circular graph with AssemblyAI or Soniox for VAD/STT.
  * Key components:
  * - ConversationGraphWrapper: The main graph that processes audio → STT → LLM → TTS
  * - ConnectionManager: Manages WebSocket connections and feeds audio to the graph
@@ -20,7 +20,7 @@ import { fileURLToPath } from 'url';
 import { createServer } from 'http';
 import { WebSocketServer } from 'ws';
 
-import { serverConfig } from './config/server.js';
+import { serverConfig, getSttProvider } from './config/server.js';
 import { serverLogger as logger } from './utils/logger.js';
 
 // Import services
@@ -105,10 +105,12 @@ async function startServer(): Promise<void> {
   try {
     await initializeGraph();
     await exportGraphConfigs();
-    const sttProvider = process.env.STT_PROVIDER || 'assembly';
     server.listen(serverConfig.port, () => {
       logger.info({ port: serverConfig.port }, 'server_started');
-      logger.info({ sttProvider }, 'using_inworld_runtime_0.9_with_stt');
+      logger.info(
+        { sttProvider: getSttProvider() },
+        'using_inworld_runtime_0.9_with_stt'
+      );
     });
   } catch (error) {
     logger.fatal({ err: error }, 'server_start_failed');
diff --git a/backend/src/services/graph-service.ts b/backend/src/services/graph-service.ts
index f0cad43..79670f8 100644
--- a/backend/src/services/graph-service.ts
+++ b/backend/src/services/graph-service.ts
@@ -19,7 +19,7 @@ import { getResponseFeedbackGraph } from '../graphs/response-feedback-graph.js';
 import { initializeTTSGraphs } from '../graphs/simple-tts-graph.js';
 import { serverLogger as logger } from '../utils/logger.js';
 import { connections } from './state.js';
-import { STTProvider } from '../config/server.js';
+import { getSttProvider } from '../config/server.js';
 
 const __filename = fileURLToPath(import.meta.url);
 const __dirname = path.dirname(__filename);
@@ -32,27 +32,19 @@ export function getGraphWrapper(): ConversationGraphWrapper | null {
 }
 
 export async function initializeGraph(): Promise<void> {
-  // Read STT_PROVIDER from process.env at call time (after dotenv has loaded),
-  // not from serverConfig which is evaluated at module load time before dotenv.
-  const sttProvider = (process.env.STT_PROVIDER || 'assembly') as STTProvider;
-  let sttApiKey: string;
+  const sonioxKey = process.env.SONIOX_API_KEY;
+  const assemblyKey = process.env.ASSEMBLY_AI_API_KEY;
 
-  if (sttProvider === 'soniox') {
-    sttApiKey = process.env.SONIOX_API_KEY || '';
-    if (!sttApiKey) {
-      throw new Error(
-        'SONIOX_API_KEY environment variable is required when STT_PROVIDER=soniox'
-      );
-    }
-  } else {
-    sttApiKey = process.env.ASSEMBLY_AI_API_KEY || '';
-    if (!sttApiKey) {
-      throw new Error(
-        'ASSEMBLY_AI_API_KEY environment variable is required when STT_PROVIDER=assembly'
-      );
-    }
+  if (!sonioxKey && !assemblyKey) {
+    throw new Error(
+      'No speech-to-text API key configured. ' +
+        'Set either SONIOX_API_KEY or ASSEMBLY_AI_API_KEY in your backend/.env file.'
+    );
   }
 
+  const sttProvider = getSttProvider();
+  const sttApiKey = sttProvider === 'soniox' ? sonioxKey! : assemblyKey!;
+
   logger.info({ sttProvider }, 'initializing_conversation_graph');
   graphWrapper = getConversationGraph({
     sttProvider,
diff --git a/frontend/README.md b/frontend/README.md
index 4c96e21..e7aaf98 100644
--- a/frontend/README.md
+++ b/frontend/README.md
@@ -10,7 +10,7 @@ React + TypeScript frontend for the Inworld Language Tutor application.
    npm install
    ```
 
-2. Create `.env.local` with your Supabase credentials (optional, for auth/sync):
+2. (Optional) Create `.env.local` with your Supabase credentials:
    ```bash
    VITE_SUPABASE_URL=https://YOUR_PROJECT.supabase.co
    VITE_SUPABASE_PUBLISHABLE_KEY=your_anon_key
diff --git a/render.yaml b/render.yaml
index 76f0d4e..9cfc18d 100644
--- a/render.yaml
+++ b/render.yaml
@@ -12,8 +12,6 @@ services:
         value: production
       - key: INWORLD_API_KEY
         sync: false
-      - key: STT_PROVIDER
-        value: assembly
       - key: ASSEMBLY_AI_API_KEY
         sync: false
       - key: SONIOX_API_KEY

From a812e53fca8c6a6cfcda45b2fed791842b5ab766 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Fri, 20 Feb 2026 21:30:46 -0800
Subject: [PATCH 13/16] feat: better flashcards with images

---
 README.md                                     |  13 ++
 backend/.env.example                          |   3 +
 backend/src/helpers/anki-exporter.ts          | 120 ++++++++++++----
 backend/src/helpers/audio-utils.ts            |  44 ++++++
 backend/src/helpers/image-generator.ts        | 133 ++++++++++++++++++
 backend/src/helpers/tts-audio-generator.ts    | 119 ++++++++++++++++
 backend/src/services/api-routes.ts            |  53 ++++++-
 frontend/src/App.tsx                          |   4 +-
 frontend/src/components/FlashcardsSection.tsx |  38 +++--
 frontend/src/components/Header.tsx            |   8 +-
 frontend/src/components/Sidebar.tsx           |   2 +-
 frontend/src/styles/main.css                  |  65 +++++++++
 12 files changed, 553 insertions(+), 49 deletions(-)
 create mode 100644 backend/src/helpers/image-generator.ts
 create mode 100644 backend/src/helpers/tts-audio-generator.ts

diff --git a/README.md b/README.md
index 44283db..11ddc8d 100644
--- a/README.md
+++ b/README.md
@@ -109,6 +109,18 @@ VITE_SUPABASE_PUBLISHABLE_KEY=your_anon_key
 
 Find these in: Supabase Dashboard > Settings > API
 
+### Step 6 (Optional): Enable Flashcard Images with Replicate
+
+When exporting flashcards to Anki, the app can generate a unique illustrative image for each vocabulary word using [Replicate](https://replicate.com/)'s FLUX Schnell model. Without this key, flashcards are exported with audio only.
+
+Add to `backend/.env`:
+
+```bash
+REPLICATE_API_TOKEN=your_replicate_api_token
+```
+
+Get a token at [replicate.com/account/api-tokens](https://replicate.com/account/api-tokens).
+
 ## Repo Structure
 
 ```
@@ -185,6 +197,7 @@ Without Supabase, the app works in anonymous mode using localStorage (no memory
 | `SONIOX_EAGERNESS`          | No                 | Soniox endpoint detection: `low`, `medium`, `high` (default: high) |
 | `SUPABASE_URL`              | No                 | Supabase project URL (enables memory feature)                      |
 | `SUPABASE_SECRET_KEY`       | No                 | Supabase secret key (for backend memory storage)                   |
+| `REPLICATE_API_TOKEN`       | No                 | Replicate API token (enables flashcard image generation)           |
 
 ## Testing
 
diff --git a/backend/.env.example b/backend/.env.example
index 2366e67..251c931 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -4,5 +4,8 @@ INWORLD_API_KEY=
 ASSEMBLY_AI_API_KEY=
 SONIOX_API_KEY=
 
+# Optional: generates images for Anki flashcards
+REPLICATE_API_TOKEN=
+
 SUPABASE_URL=
 SUPABASE_SECRET_KEY=
diff --git a/backend/src/helpers/anki-exporter.ts b/backend/src/helpers/anki-exporter.ts
index 793a3be..15b62bf 100644
--- a/backend/src/helpers/anki-exporter.ts
+++ b/backend/src/helpers/anki-exporter.ts
@@ -1,26 +1,41 @@
 // @ts-expect-error - no type definitions available for anki-apkg-export
 import AnkiExport from 'anki-apkg-export';
 import { Flashcard } from './flashcard-processor.js';
+import { GeneratedAudio } from './tts-audio-generator.js';
+import { GeneratedImage } from './image-generator.js';
 
 export class AnkiExporter {
   /**
    * Export flashcards to ANKI .apkg format
+   * @param audioMap - Optional map from targetWord to generated audio file info.
+   * @param imageMap - Optional map from targetWord to generated image file info.
    */
   async exportFlashcards(
     flashcards: Flashcard[],
-    deckName: string = 'Inworld Language Tutor Cards'
+    deckName: string = 'Inworld Language Tutor Cards',
+    audioMap?: Map<string, GeneratedAudio>,
+    imageMap?: Map<string, GeneratedImage>
   ): Promise<Buffer> {
     // eslint-disable-next-line @typescript-eslint/no-explicit-any
     const apkg = new (AnkiExport as any).default(deckName);
 
-    // Add each flashcard as a card
+    if (audioMap) {
+      for (const [, audio] of audioMap) {
+        apkg.addMedia(audio.filename, audio.buffer);
+      }
+    }
+
+    if (imageMap) {
+      for (const [, image] of imageMap) {
+        apkg.addMedia(image.filename, image.buffer);
+      }
+    }
+
     flashcards.forEach((flashcard) => {
-      // Support both new 'targetWord' and legacy 'spanish' field
-      // @deprecated Legacy 'spanish' field support - remove when all data migrated
+      // @deprecated Legacy 'spanish' field support
       const targetWord =
         flashcard.targetWord || (flashcard as { spanish?: string }).spanish;
 
-      // Skip empty or error flashcards
       if (
         !targetWord ||
         !flashcard.english ||
@@ -30,16 +45,11 @@ export class AnkiExporter {
         return;
       }
 
-      let front = targetWord.trim();
-      if (flashcard.pinyin) {
-        front += `<br><span style="font-size: 14px; color: #888;">${this.escapeHtml(flashcard.pinyin)}</span>`;
-      }
-      const back = this.formatCardBack(flashcard);
+      const front = this.formatCardFront(flashcard, targetWord.trim(), audioMap);
+      const back = this.formatCardBack(flashcard, audioMap, imageMap);
 
-      // Add tags for organization
       const tags = ['inworld-language-tutor'];
 
-      // Add language tag if available
       if (flashcard.languageCode) {
         tags.push(`language-${flashcard.languageCode}`);
       }
@@ -52,39 +62,96 @@ export class AnkiExporter {
       apkg.addCard(front, back, { tags });
     });
 
-    // Generate and return the .apkg file as Buffer
     const zipBuffer = await apkg.save();
     return zipBuffer;
   }
 
+  /**
+   * Format the front of the card (target word + pinyin + audio)
+   */
+  private formatCardFront(
+    flashcard: Flashcard,
+    targetWord: string,
+    audioMap?: Map<string, GeneratedAudio>
+  ): string {
+    const audio = audioMap?.get(targetWord);
+
+    let html = `<div style="font-family: 'Noto Sans', 'Noto Sans SC', 'Noto Sans JP', 'Helvetica Neue', Arial, sans-serif; text-align: center; padding: 20px;">`;
+
+    html += `<div style="font-size: 42px; font-weight: 700; color: #1a1a2e; margin-bottom: 8px; line-height: 1.3;">${this.escapeHtml(targetWord)}</div>`;
+
+    if (flashcard.pinyin) {
+      html += `<div style="font-size: 18px; color: #6c757d; margin-bottom: 16px; letter-spacing: 0.5px; font-style: italic;">${this.escapeHtml(flashcard.pinyin)}</div>`;
+    }
+
+    if (audio) {
+      html += `<div style="margin: 12px 0;">[sound:${audio.filename}]</div>`;
+    }
+
+    html += `</div>`;
+    return html;
+  }
+
   /**
    * Format the back of the card with English, example, and mnemonic
    */
-  private formatCardBack(flashcard: Flashcard): string {
-    let back = `<div style="font-size: 18px; margin-bottom: 10px;">${this.escapeHtml(flashcard.english)}</div>`;
+  private formatCardBack(
+    flashcard: Flashcard,
+    audioMap?: Map<string, GeneratedAudio>,
+    imageMap?: Map<string, GeneratedImage>
+  ): string {
+    const targetWord = (
+      flashcard.targetWord ||
+      (flashcard as { spanish?: string }).spanish ||
+      ''
+    ).trim();
+
+    let html = `<div style="font-family: 'Noto Sans', 'Noto Sans SC', 'Noto Sans JP', 'Helvetica Neue', Arial, sans-serif; text-align: center; padding: 20px; max-width: 480px; margin: 0 auto;">`;
+
+    html += `<div style="font-size: 26px; font-weight: 600; color: #1a1a2e; margin-bottom: 20px; line-height: 1.4;">${this.escapeHtml(flashcard.english)}</div>`;
+
+    html += `<hr style="border: none; border-top: 1px solid #e0e0e0; margin: 0 0 20px 0;">`;
 
     if (flashcard.example && flashcard.example.trim()) {
-      let exampleHtml = this.escapeHtml(flashcard.example);
+      const sentenceAudio = audioMap?.get(flashcard.example.trim());
+
+      html += `<div style="background: linear-gradient(135deg, #f8f9ff 0%, #f0f4ff 100%); border-radius: 12px; padding: 16px 20px; margin-bottom: 16px; border-left: 4px solid #4a6cf7; text-align: left;">`;
+
+      html += `<div style="display: flex; align-items: flex-start; justify-content: space-between; gap: 8px;">`;
+      html += `<div style="flex: 1;">`;
+      html += `<div style="font-size: 17px; color: #2d3748; line-height: 1.6;">${this.escapeHtml(flashcard.example)}</div>`;
       if (flashcard.examplePinyin) {
-        exampleHtml += `<br><span style="font-size: 12px; color: #999;">${this.escapeHtml(flashcard.examplePinyin)}</span>`;
+        html += `<div style="font-size: 13px; color: #8e99a4; margin-top: 2px; font-style: italic;">${this.escapeHtml(flashcard.examplePinyin)}</div>`;
+      }
+      html += `</div>`;
+      if (sentenceAudio) {
+        html += `<div style="flex-shrink: 0; padding-top: 2px;">[sound:${sentenceAudio.filename}]</div>`;
       }
-      back += `<div style="font-size: 14px; color: #666; font-style: italic; margin: 10px 0; padding: 8px; background-color: #f5f5f5; border-left: 3px solid #2196F3;">${exampleHtml}</div>`;
+      html += `</div>`;
+
+      if (flashcard.exampleTranslation && flashcard.exampleTranslation.trim()) {
+        html += `<div style="font-size: 14px; color: #718096; margin-top: 8px; padding-top: 8px; border-top: 1px solid rgba(74, 108, 247, 0.15);">${this.escapeHtml(flashcard.exampleTranslation)}</div>`;
+      }
+
+      html += `</div>`;
     }
 
-    if (flashcard.exampleTranslation && flashcard.exampleTranslation.trim()) {
-      back += `<div style="font-size: 13px; color: #888; margin: -4px 0 10px 0; padding: 0 8px;">${this.escapeHtml(flashcard.exampleTranslation)}</div>`;
+    const image = imageMap?.get(targetWord);
+    if (image) {
+      html += `<div style="margin: 16px auto; max-width: 320px;"><img src="${image.filename}" style="width: 100%; border-radius: 12px; box-shadow: 0 2px 12px rgba(0,0,0,0.08);"></div>`;
     }
 
     if (flashcard.mnemonic && flashcard.mnemonic.trim()) {
-      back += `<div style="font-size: 13px; color: #4CAF50; margin-top: 10px; padding: 8px; background-color: #e8f5e8; border-radius: 4px;"><strong>💡 Remember:</strong> ${this.escapeHtml(flashcard.mnemonic)}</div>`;
+      html += `<div style="background: linear-gradient(135deg, #f0fdf4 0%, #ecfdf5 100%); border-radius: 12px; padding: 14px 18px; text-align: left; border-left: 4px solid #22c55e;">`;
+      html += `<div style="font-size: 12px; font-weight: 700; color: #16a34a; text-transform: uppercase; letter-spacing: 0.5px; margin-bottom: 6px;">Remember</div>`;
+      html += `<div style="font-size: 14px; color: #334155; line-height: 1.5;">${this.escapeHtml(flashcard.mnemonic)}</div>`;
+      html += `</div>`;
     }
 
-    return back;
+    html += `</div>`;
+    return html;
   }
 
-  /**
-   * Escape HTML characters to prevent XSS and formatting issues
-   */
   private escapeHtml(text: string): string {
     return text
       .replace(/&/g, '&amp;')
@@ -94,9 +161,6 @@ export class AnkiExporter {
       .replace(/'/g, '&#39;');
   }
 
-  /**
-   * Count valid flashcards (ones that can be exported)
-   */
   countValidFlashcards(flashcards: Flashcard[]): number {
     return flashcards.filter((flashcard) => {
       const targetWord =
diff --git a/backend/src/helpers/audio-utils.ts b/backend/src/helpers/audio-utils.ts
index b12c336..115fd01 100644
--- a/backend/src/helpers/audio-utils.ts
+++ b/backend/src/helpers/audio-utils.ts
@@ -2,6 +2,50 @@
  * Audio utility functions for format conversion
  */
 
+/**
+ * Encode raw PCM16 samples as a WAV file buffer.
+ * Returns a complete .wav file that can be written to disk or embedded in an Anki package.
+ */
+export function encodeWav(
+  pcm16: Int16Array,
+  sampleRate: number,
+  numChannels: number = 1
+): Buffer {
+  const bytesPerSample = 2;
+  const dataByteLength = pcm16.length * bytesPerSample;
+  const headerSize = 44;
+  const buffer = Buffer.alloc(headerSize + dataByteLength);
+
+  // RIFF header
+  buffer.write('RIFF', 0);
+  buffer.writeUInt32LE(36 + dataByteLength, 4);
+  buffer.write('WAVE', 8);
+
+  // fmt sub-chunk
+  buffer.write('fmt ', 12);
+  buffer.writeUInt32LE(16, 16); // sub-chunk size
+  buffer.writeUInt16LE(1, 20); // PCM format
+  buffer.writeUInt16LE(numChannels, 22);
+  buffer.writeUInt32LE(sampleRate, 24);
+  buffer.writeUInt32LE(sampleRate * numChannels * bytesPerSample, 28); // byte rate
+  buffer.writeUInt16LE(numChannels * bytesPerSample, 32); // block align
+  buffer.writeUInt16LE(bytesPerSample * 8, 34); // bits per sample
+
+  // data sub-chunk
+  buffer.write('data', 36);
+  buffer.writeUInt32LE(dataByteLength, 40);
+
+  // PCM samples (little-endian Int16, which is how Int16Array is stored on LE systems)
+  const pcm16Bytes = Buffer.from(
+    pcm16.buffer,
+    pcm16.byteOffset,
+    pcm16.byteLength
+  );
+  pcm16Bytes.copy(buffer, headerSize);
+
+  return buffer;
+}
+
 /**
  * Convert Float32Array audio data to Int16Array (PCM16)
  */
diff --git a/backend/src/helpers/image-generator.ts b/backend/src/helpers/image-generator.ts
new file mode 100644
index 0000000..d92b7ca
--- /dev/null
+++ b/backend/src/helpers/image-generator.ts
@@ -0,0 +1,133 @@
+/**
+ * Image Generator using Replicate API (FLUX Schnell model)
+ *
+ * Generates illustrative images for flashcard words to aid visual memory.
+ */
+
+import { serverLogger as logger } from '../utils/logger.js';
+
+export interface GeneratedImage {
+  filename: string;
+  buffer: Buffer;
+}
+
+/**
+ * Generate an image for a single word using Replicate's FLUX Schnell model.
+ * Returns the image as a buffer, or null on failure.
+ */
+async function generateImage(
+  word: string,
+  englishWord: string,
+  index: number
+): Promise<GeneratedImage | null> {
+  const apiToken = process.env.REPLICATE_API_TOKEN;
+  if (!apiToken) {
+    logger.warn('REPLICATE_API_TOKEN not set, skipping image generation');
+    return null;
+  }
+
+  const prompt = `a memorable, colorful, hand-drawn image of ${englishWord}`;
+
+  const response = await fetch(
+    'https://api.replicate.com/v1/models/black-forest-labs/flux-schnell/predictions',
+    {
+      method: 'POST',
+      headers: {
+        Authorization: `Bearer ${apiToken}`,
+        'Content-Type': 'application/json',
+        Prefer: 'wait',
+      },
+      body: JSON.stringify({
+        input: {
+          prompt,
+          go_fast: true,
+          megapixels: '1',
+          num_outputs: 1,
+          aspect_ratio: '1:1',
+          output_format: 'webp',
+          output_quality: 80,
+          num_inference_steps: 4,
+        },
+      }),
+    }
+  );
+
+  if (!response.ok) {
+    logger.warn(
+      { status: response.status, word },
+      'replicate_api_request_failed'
+    );
+    return null;
+  }
+
+  const data = (await response.json()) as {
+    status: string;
+    output?: string[];
+  };
+
+  if (data.status !== 'succeeded' || !data.output?.[0]) {
+    logger.warn({ word, status: data.status }, 'replicate_prediction_failed');
+    return null;
+  }
+
+  const imageUrl = data.output[0];
+  const imageResponse = await fetch(imageUrl);
+  if (!imageResponse.ok) {
+    logger.warn({ word, imageUrl }, 'replicate_image_download_failed');
+    return null;
+  }
+
+  const arrayBuffer = await imageResponse.arrayBuffer();
+  const buffer = Buffer.from(arrayBuffer);
+
+  const sanitized = word
+    .trim()
+    .toLowerCase()
+    .replace(
+      /[^a-z0-9\u00C0-\u024F\u1E00-\u1EFF\u3000-\u9FFF\uAC00-\uD7AF]/g,
+      '_'
+    )
+    .replace(/_+/g, '_')
+    .replace(/^_|_$/g, '');
+
+  const filename = `img_${sanitized}_${index}.webp`;
+
+  return { filename, buffer };
+}
+
+/**
+ * Generate images for multiple words in sequence.
+ * @param wordToEnglish - Map from target-language word to its English translation (used as the image prompt).
+ * Returns a map from the original target word to the image filename and buffer.
+ */
+export async function generateBatchImages(
+  wordToEnglish: Map<string, string>
+): Promise<Map<string, GeneratedImage>> {
+  const results = new Map<string, GeneratedImage>();
+  const apiToken = process.env.REPLICATE_API_TOKEN;
+
+  if (!apiToken) {
+    logger.info(
+      'REPLICATE_API_TOKEN not configured, skipping image generation'
+    );
+    return results;
+  }
+
+  let i = 0;
+  for (const [word, english] of wordToEnglish) {
+    try {
+      const image = await generateImage(word, english, i);
+      if (image) {
+        results.set(word, image);
+      }
+    } catch (error) {
+      logger.warn(
+        { word, err: error },
+        'image_generation_failed_for_word'
+      );
+    }
+    i++;
+  }
+
+  return results;
+}
diff --git a/backend/src/helpers/tts-audio-generator.ts b/backend/src/helpers/tts-audio-generator.ts
new file mode 100644
index 0000000..8bdfd2f
--- /dev/null
+++ b/backend/src/helpers/tts-audio-generator.ts
@@ -0,0 +1,119 @@
+/**
+ * Batch TTS Audio Generator
+ *
+ * Generates WAV audio buffers for a list of words using the SimpleTTSGraph.
+ * Used by the Anki exporter to embed pronunciation audio into .apkg files.
+ */
+
+import { GraphTypes } from '@inworld/runtime/graph';
+import { getSimpleTTSGraph } from '../graphs/simple-tts-graph.js';
+import { float32ToPCM16, encodeWav } from './audio-utils.js';
+import { serverConfig } from '../config/server.js';
+import { serverLogger as logger } from '../utils/logger.js';
+
+export interface GeneratedAudio {
+  filename: string;
+  buffer: Buffer;
+}
+
+/**
+ * Generate a WAV audio buffer for a single word via TTS.
+ * Collects all streamed audio chunks into one contiguous buffer.
+ */
+export async function generateTTSAudio(
+  text: string,
+  languageCode: string
+): Promise<Buffer | null> {
+  const graph = getSimpleTTSGraph(languageCode);
+  const executionResult = await graph.start({ text: text.trim() });
+
+  const rawChunks: Buffer[] = [];
+
+  for await (const res of executionResult.outputStream) {
+    if ('processResponse' in res) {
+      const resultWithProcess = res as {
+        processResponse: (
+          handlers: Record<string, (data: unknown) => Promise<void> | void>
+        ) => Promise<void>;
+      };
+      await resultWithProcess.processResponse({
+        TTSOutputStream: async (ttsData: unknown) => {
+          const ttsStream = ttsData as GraphTypes.TTSOutputStream;
+          for await (const chunk of ttsStream) {
+            if (chunk.audio?.data) {
+              const audioData = chunk.audio.data;
+              if (typeof audioData === 'string') {
+                rawChunks.push(Buffer.from(audioData, 'base64'));
+              } else if (Array.isArray(audioData)) {
+                rawChunks.push(Buffer.from(audioData));
+              } else {
+                rawChunks.push(
+                  Buffer.from(
+                    audioData.buffer,
+                    audioData.byteOffset,
+                    audioData.byteLength
+                  )
+                );
+              }
+            }
+          }
+        },
+      });
+    }
+  }
+
+  if (rawChunks.length === 0) {
+    return null;
+  }
+
+  // Inworld TTS returns raw bytes that represent Float32 PCM samples
+  const combined = Buffer.concat(rawChunks);
+  const float32 = new Float32Array(
+    combined.buffer,
+    combined.byteOffset,
+    combined.byteLength / 4
+  );
+
+  const pcm16 = float32ToPCM16(float32);
+  const sampleRate = serverConfig.audio.ttsSampleRate;
+  return encodeWav(pcm16, sampleRate);
+}
+
+/**
+ * Generate TTS audio for multiple words in sequence.
+ * Returns a map from the original word to the WAV filename and buffer.
+ */
+export async function generateBatchTTSAudio(
+  words: string[],
+  languageCode: string,
+  onProgress?: (completed: number, total: number) => void
+): Promise<Map<string, GeneratedAudio>> {
+  const results = new Map<string, GeneratedAudio>();
+  const total = words.length;
+
+  for (let i = 0; i < words.length; i++) {
+    const word = words[i];
+    try {
+      const wavBuffer = await generateTTSAudio(word, languageCode);
+      if (wavBuffer) {
+        const sanitized = word
+          .trim()
+          .toLowerCase()
+          .replace(/[^a-z0-9\u00C0-\u024F\u1E00-\u1EFF\u3000-\u9FFF\uAC00-\uD7AF]/g, '_')
+          .replace(/_+/g, '_')
+          .replace(/^_|_$/g, '');
+        const filename = `tts_${sanitized}_${i}.wav`;
+        results.set(word, { filename, buffer: wavBuffer });
+      }
+      onProgress?.(i + 1, total);
+    } catch (error) {
+      logger.warn(
+        { word, languageCode, err: error },
+        'tts_batch_generation_failed_for_word'
+      );
+      onProgress?.(i + 1, total);
+    }
+  }
+
+  return results;
+}
diff --git a/backend/src/services/api-routes.ts b/backend/src/services/api-routes.ts
index dccac71..75e5d9f 100644
--- a/backend/src/services/api-routes.ts
+++ b/backend/src/services/api-routes.ts
@@ -6,6 +6,9 @@
 
 import { Router } from 'express';
 import { AnkiExporter } from '../helpers/anki-exporter.js';
+import { generateBatchTTSAudio } from '../helpers/tts-audio-generator.js';
+import { generateBatchImages } from '../helpers/image-generator.js';
+import { Flashcard } from '../helpers/flashcard-processor.js';
 import {
   getLanguageOptions,
   DEFAULT_LANGUAGE_CODE,
@@ -18,7 +21,11 @@ export const apiRouter = Router();
 // ANKI export endpoint
 apiRouter.post('/export-anki', async (req, res) => {
   try {
-    const { flashcards, deckName, languageCode: _languageCode } = req.body;
+    const {
+      flashcards,
+      deckName,
+      languageCode,
+    } = req.body;
 
     if (!flashcards || !Array.isArray(flashcards) || flashcards.length === 0) {
       res.status(400).json({ error: 'No flashcards provided' });
@@ -33,10 +40,52 @@ apiRouter.post('/export-anki', async (req, res) => {
       return;
     }
 
+    const lang = languageCode || DEFAULT_LANGUAGE_CODE;
+    const texts: string[] = [];
+    const wordToEnglish = new Map<string, string>();
+
+    for (const fc of flashcards as Flashcard[]) {
+      const word =
+        (fc.targetWord || (fc as { spanish?: string }).spanish || '').trim();
+      if (word) {
+        texts.push(word);
+        if (fc.english) {
+          wordToEnglish.set(word, fc.english.trim());
+        }
+      }
+
+      const sentence = (fc.example || '').trim();
+      if (sentence) texts.push(sentence);
+    }
+
+    const uniqueTexts = [...new Set(texts)];
+
+    logger.info(
+      { textCount: uniqueTexts.length, imageCount: wordToEnglish.size, languageCode: lang },
+      'anki_export_generating_media'
+    );
+
+    const [audioMap, imageMap] = await Promise.all([
+      generateBatchTTSAudio(uniqueTexts, lang),
+      generateBatchImages(wordToEnglish),
+    ]);
+
+    logger.info(
+      {
+        audioCount: audioMap.size,
+        imageCount: imageMap.size,
+        requestedTexts: uniqueTexts.length,
+        requestedImages: wordToEnglish.size,
+      },
+      'anki_export_media_generation_complete'
+    );
+
     const defaultDeckName = `Inworld Language Tutor Spanish Cards`;
     const apkgBuffer = await exporter.exportFlashcards(
       flashcards,
-      deckName || defaultDeckName
+      deckName || defaultDeckName,
+      audioMap,
+      imageMap
     );
 
     res.setHeader('Content-Type', 'application/octet-stream');
diff --git a/frontend/src/App.tsx b/frontend/src/App.tsx
index dd39db8..b77969b 100644
--- a/frontend/src/App.tsx
+++ b/frontend/src/App.tsx
@@ -45,7 +45,7 @@ function AppContent() {
           target="_blank"
           rel="noopener noreferrer"
           className="fab-button fab-labeled"
-          aria-label="Deploy on Render"
+          aria-label="Render"
         >
           <svg
             width="16"
@@ -61,7 +61,7 @@ function AppContent() {
             <path d="M2 17l10 5 10-5" />
             <path d="M2 12l10 5 10-5" />
           </svg>
-          <span>Deploy on Render</span>
+          <span>Render</span>
         </a>
         <a
           href="https://github.com/inworld-ai/language-learning-node"
diff --git a/frontend/src/components/FlashcardsSection.tsx b/frontend/src/components/FlashcardsSection.tsx
index 8ac69d2..aa67c91 100644
--- a/frontend/src/components/FlashcardsSection.tsx
+++ b/frontend/src/components/FlashcardsSection.tsx
@@ -3,7 +3,6 @@ import { useApp } from '../context/AppContext';
 import { Flashcard } from './Flashcard';
 import type { Flashcard as FlashcardType } from '../types';
 
-// Helper for API URL for Cloud Run deployment
 const getApiUrl = (path: string): string => {
   const backendUrl = import.meta.env.VITE_BACKEND_URL;
   return backendUrl ? `${backendUrl}${path}` : path;
@@ -100,7 +99,6 @@ export function FlashcardsSection() {
     }
   }, [flashcards, currentLanguage]);
 
-  // Sort flashcards by timestamp (most recent first)
   const sortedFlashcards = [...flashcards].sort((a, b) => {
     const timeA = new Date(a.timestamp || 0).getTime();
     const timeB = new Date(b.timestamp || 0).getTime();
@@ -114,20 +112,20 @@ export function FlashcardsSection() {
     <section className="flashcards-section">
       <div className="section-header">
         <h2>Flashcards</h2>
-        <span
-          id="cardCount"
-          className={`card-count ${canExport ? 'exportable' : ''}`}
-          onClick={canExport && !isExporting ? exportToAnki : undefined}
-          style={{
-            cursor: canExport && !isExporting ? 'pointer' : 'default',
-          }}
-        >
-          {isExporting
-            ? 'Exporting...'
-            : canExport
+        <div style={{ display: 'flex', alignItems: 'center', gap: '12px' }}>
+          <span
+            id="cardCount"
+            className={`card-count ${canExport ? 'exportable' : ''}`}
+            onClick={canExport && !isExporting ? exportToAnki : undefined}
+            style={{
+              cursor: canExport && !isExporting ? 'pointer' : 'default',
+            }}
+          >
+            {canExport
               ? `Export ${cardCount} card${cardCount !== 1 ? 's' : ''} to Anki`
               : `${cardCount} card${cardCount !== 1 ? 's' : ''}`}
-        </span>
+          </span>
+        </div>
       </div>
       <div className="flashcards-container">
         <div className="flashcards-grid" id="flashcardsGrid">
@@ -160,6 +158,18 @@ export function FlashcardsSection() {
           )}
         </div>
       </div>
+
+      {isExporting && (
+        <div className="export-modal-overlay">
+          <div className="export-modal">
+            <div className="export-modal-spinner" />
+            <div className="export-modal-text">Exporting flashcards</div>
+            <div className="export-modal-subtext">
+              Generating audio &amp; images for {cardCount} card{cardCount !== 1 ? 's' : ''}...
+            </div>
+          </div>
+        </div>
+      )}
     </section>
   );
 }
diff --git a/frontend/src/components/Header.tsx b/frontend/src/components/Header.tsx
index 44ce6fa..73c5d85 100644
--- a/frontend/src/components/Header.tsx
+++ b/frontend/src/components/Header.tsx
@@ -214,13 +214,17 @@ export function Header() {
           )}
 
           {/* Logo */}
-          <div>
+          <a
+            href="https://www.inworld.ai"
+            target="_blank"
+            rel="noopener noreferrer"
+          >
             <img
               src="/favicon.svg"
               alt="Inworld Language Tutor"
               className="logo-icon"
             />
-          </div>
+          </a>
         </div>
       </div>
     </header>
diff --git a/frontend/src/components/Sidebar.tsx b/frontend/src/components/Sidebar.tsx
index f13b32f..fa222ac 100644
--- a/frontend/src/components/Sidebar.tsx
+++ b/frontend/src/components/Sidebar.tsx
@@ -277,7 +277,7 @@ export function Sidebar() {
               <path d="M2 17l10 5 10-5" />
               <path d="M2 12l10 5 10-5" />
             </svg>
-            <span>Deploy on Render</span>
+            <span>Render</span>
           </a>
           <a
             href="https://github.com/inworld-ai/language-learning-node"
diff --git a/frontend/src/styles/main.css b/frontend/src/styles/main.css
index 8adc900..dcd3116 100644
--- a/frontend/src/styles/main.css
+++ b/frontend/src/styles/main.css
@@ -1711,6 +1711,71 @@ body {
     text-align: center;
 }
 
+/* Export Modal */
+.export-modal-overlay {
+    position: fixed;
+    inset: 0;
+    background: rgba(0, 0, 0, 0.45);
+    backdrop-filter: blur(4px);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    z-index: 2000;
+    animation: modal-fade-in 0.2s ease-out;
+}
+
+@keyframes modal-fade-in {
+    from {
+        opacity: 0;
+    }
+    to {
+        opacity: 1;
+    }
+}
+
+.export-modal {
+    background: #ffffff;
+    border-radius: 16px;
+    padding: 40px 48px;
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    gap: 16px;
+    box-shadow: 0 20px 60px rgba(0, 0, 0, 0.15), 0 0 0 1px rgba(0, 0, 0, 0.05);
+    animation: modal-scale-in 0.25s ease-out;
+}
+
+@keyframes modal-scale-in {
+    from {
+        opacity: 0;
+        transform: scale(0.95);
+    }
+    to {
+        opacity: 1;
+        transform: scale(1);
+    }
+}
+
+.export-modal-spinner {
+    width: 40px;
+    height: 40px;
+    border: 3px solid #e5e5e5;
+    border-top-color: #1a1a1a;
+    border-radius: 50%;
+    animation: spin 0.8s linear infinite;
+}
+
+.export-modal-text {
+    font-size: 17px;
+    font-weight: 600;
+    color: #1a1a1a;
+}
+
+.export-modal-subtext {
+    font-size: 14px;
+    color: #6b7280;
+}
+
 /* Responsive */
 @media (max-width: 768px) {
     .header-logo {

From 036e68e527991ddcf0878cd185ccfad80b8fb3d1 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Mon, 23 Feb 2026 09:53:43 -0800
Subject: [PATCH 14/16] fix: improve json parsing for flashcards

---
 backend/package-lock.json                  | 10 +++++
 backend/package.json                       |  1 +
 backend/src/graphs/flashcard-graph.ts      | 47 +++++++++++++++-------
 backend/src/helpers/flashcard-processor.ts | 13 +++++-
 backend/src/services/websocket-handler.ts  |  5 +++
 5 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/backend/package-lock.json b/backend/package-lock.json
index 0672fc4..1d24a22 100644
--- a/backend/package-lock.json
+++ b/backend/package-lock.json
@@ -15,6 +15,7 @@
         "cors": "^2.8.5",
         "dotenv": "^17.2.1",
         "express": "^4.22.1",
+        "jsonrepair": "^3.13.2",
         "pino": "^10.1.0",
         "uuid": "^11.1.0",
         "ws": "^8.18.0"
@@ -4545,6 +4546,15 @@
       "dev": true,
       "license": "MIT"
     },
+    "node_modules/jsonrepair": {
+      "version": "3.13.2",
+      "resolved": "https://registry.npmjs.org/jsonrepair/-/jsonrepair-3.13.2.tgz",
+      "integrity": "sha512-Leuly0nbM4R+S5SVJk3VHfw1oxnlEK9KygdZvfUtEtTawNDyzB4qa1xWTmFt1aeoA7sXZkVTRuIixJ8bAvqVUg==",
+      "license": "ISC",
+      "bin": {
+        "jsonrepair": "bin/cli.js"
+      }
+    },
     "node_modules/jszip": {
       "version": "3.10.1",
       "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz",
diff --git a/backend/package.json b/backend/package.json
index a2f5f55..6b19171 100644
--- a/backend/package.json
+++ b/backend/package.json
@@ -58,6 +58,7 @@
     "cors": "^2.8.5",
     "dotenv": "^17.2.1",
     "express": "^4.22.1",
+    "jsonrepair": "^3.13.2",
     "pino": "^10.1.0",
     "uuid": "^11.1.0",
     "ws": "^8.18.0"
diff --git a/backend/src/graphs/flashcard-graph.ts b/backend/src/graphs/flashcard-graph.ts
index cec41c2..37142e4 100644
--- a/backend/src/graphs/flashcard-graph.ts
+++ b/backend/src/graphs/flashcard-graph.ts
@@ -14,6 +14,7 @@ import { v4 } from 'uuid';
 import { Flashcard } from '../helpers/flashcard-processor.js';
 import { llmConfig } from '../config/llm.js';
 import { flashcardLogger as logger } from '../utils/logger.js';
+import { jsonrepair } from 'jsonrepair';
 
 class FlashcardPromptBuilderNode extends CustomNode {
   async process(
@@ -38,22 +39,40 @@ class TextToChatRequestNode extends CustomNode {
 
 class FlashcardParserNode extends CustomNode {
   process(_context: ProcessContext, input: GraphTypes.Content) {
-    try {
-      const content =
-        (input &&
-          typeof input === 'object' &&
-          'content' in input &&
-          (input as { content?: unknown }).content) ||
-        input;
-      const textContent =
-        typeof content === 'string' ? content : JSON.stringify(content);
+    const content =
+      (input &&
+        typeof input === 'object' &&
+        'content' in input &&
+        (input as { content?: unknown }).content) ||
+      input;
+    const textContent =
+      typeof content === 'string' ? content : JSON.stringify(content);
 
-      const jsonMatch = textContent.match(/\{[\s\S]*\}/);
-      if (jsonMatch) {
-        const parsed = JSON.parse(jsonMatch[0]);
+    const jsonMatch = textContent.match(/\{[\s\S]*\}/);
+    if (jsonMatch) {
+      const raw = jsonMatch[0];
+      let parsed: Record<string, string> | undefined;
+
+      try {
+        parsed = JSON.parse(raw);
+      } catch {
+        try {
+          parsed = JSON.parse(jsonrepair(raw));
+          logger.warn(
+            { raw: raw.slice(0, 500) },
+            'flashcard_json_repaired'
+          );
+        } catch (repairError) {
+          logger.error(
+            { raw: raw.slice(0, 500), err: repairError },
+            'failed_to_parse_flashcard_json'
+          );
+        }
+      }
+
+      if (parsed) {
         const result: Record<string, unknown> = {
           id: v4(),
-          // Support both new 'targetWord' format and legacy 'spanish' format
           targetWord: parsed.targetWord ?? parsed.spanish ?? '',
           english: parsed.english ?? '',
           example: parsed.example ?? '',
@@ -65,8 +84,6 @@ class FlashcardParserNode extends CustomNode {
         if (parsed.examplePinyin) result.examplePinyin = parsed.examplePinyin;
         return result;
       }
-    } catch (error) {
-      logger.error({ err: error }, 'failed_to_parse_flashcard_json');
     }
 
     return {
diff --git a/backend/src/helpers/flashcard-processor.ts b/backend/src/helpers/flashcard-processor.ts
index f2b1149..a06d43e 100644
--- a/backend/src/helpers/flashcard-processor.ts
+++ b/backend/src/helpers/flashcard-processor.ts
@@ -91,12 +91,17 @@ export class FlashcardProcessor {
     try {
       const flashcards = await Promise.all(promises);
 
-      // Filter out any failed generations and duplicates
       const validFlashcards = flashcards.filter(
         (card) => card.targetWord && card.english
       );
 
-      // Add to existing flashcards to track for future duplicates
+      if (validFlashcards.length === 0 && flashcards.length > 0) {
+        logger.warn(
+          { generated: flashcards.length },
+          'all_flashcards_filtered_out'
+        );
+      }
+
       this.existingFlashcards.push(...validFlashcards);
 
       return validFlashcards;
@@ -159,6 +164,10 @@ export class FlashcardProcessor {
       );
 
       if (isDuplicate) {
+        logger.info(
+          { word: flashcard.targetWord },
+          'flashcard_duplicate_skipped'
+        );
         return {
           id: v4(),
           targetWord: '',
diff --git a/backend/src/services/websocket-handler.ts b/backend/src/services/websocket-handler.ts
index df404c0..7c3f3de 100644
--- a/backend/src/services/websocket-handler.ts
+++ b/backend/src/services/websocket-handler.ts
@@ -112,6 +112,11 @@ export function setupWebSocketHandlers(wss: WebSocketServer): void {
                 conversationId: conversationId || null,
               })
             );
+          } else {
+            logger.info(
+              { connectionId, languageCode },
+              'flashcard_generation_returned_empty'
+            );
           }
         } catch (error) {
           if (!isShuttingDown()) {

From 5b363b414a8f37b501517da668d5a7625b6c2baa Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Mon, 23 Feb 2026 10:49:34 -0800
Subject: [PATCH 15/16] linter fixes

---
 backend/src/config/server.ts                  |  6 ++--
 backend/src/graphs/conversation-graph.ts      |  5 +--
 backend/src/graphs/flashcard-graph.ts         |  8 ++---
 .../src/graphs/nodes/soniox-stt-ws-node.ts    | 35 ++++++++++---------
 backend/src/helpers/anki-exporter.ts          |  6 +++-
 backend/src/helpers/image-generator.ts        |  5 +--
 backend/src/helpers/tts-audio-generator.ts    |  5 ++-
 backend/src/services/api-routes.ts            | 19 +++++-----
 backend/src/services/websocket-handler.ts     | 12 ++++---
 frontend/src/components/ChatSection.tsx       | 30 +++++++++-------
 frontend/src/components/Flashcard.tsx         | 14 ++++++--
 frontend/src/components/FlashcardsSection.tsx |  7 ++--
 frontend/src/services/AudioHandler.ts         |  4 ++-
 13 files changed, 87 insertions(+), 69 deletions(-)

diff --git a/backend/src/config/server.ts b/backend/src/config/server.ts
index f296e0f..3ad538e 100644
--- a/backend/src/config/server.ts
+++ b/backend/src/config/server.ts
@@ -118,8 +118,7 @@ export const serverConfig = {
    */
   soniox: {
     /** Endpoint detection eagerness level (reuses the same 'low'|'medium'|'high' scale) */
-    eagerness: (process.env.SONIOX_EAGERNESS ||
-      'high') as AssemblyAIEagerness,
+    eagerness: (process.env.SONIOX_EAGERNESS || 'high') as AssemblyAIEagerness,
   },
 
   /**
@@ -162,6 +161,7 @@ export function getSttProvider(): STTProvider {
  * Reads SONIOX_EAGERNESS from process.env at call time (after dotenv loads).
  */
 export function getSonioxSettings(): SonioxEndpointSettings {
-  const eagerness = (process.env.SONIOX_EAGERNESS || 'high') as AssemblyAIEagerness;
+  const eagerness = (process.env.SONIOX_EAGERNESS ||
+    'high') as AssemblyAIEagerness;
   return sonioxPresets[eagerness];
 }
diff --git a/backend/src/graphs/conversation-graph.ts b/backend/src/graphs/conversation-graph.ts
index fdd627d..20e1949 100644
--- a/backend/src/graphs/conversation-graph.ts
+++ b/backend/src/graphs/conversation-graph.ts
@@ -63,10 +63,7 @@ export class ConversationGraphWrapper {
   graph: Graph;
   sttNode: STTNode;
 
-  private constructor(params: {
-    graph: Graph;
-    sttNode: STTNode;
-  }) {
+  private constructor(params: { graph: Graph; sttNode: STTNode }) {
     this.graph = params.graph;
     this.sttNode = params.sttNode;
   }
diff --git a/backend/src/graphs/flashcard-graph.ts b/backend/src/graphs/flashcard-graph.ts
index 37142e4..8fa8641 100644
--- a/backend/src/graphs/flashcard-graph.ts
+++ b/backend/src/graphs/flashcard-graph.ts
@@ -58,10 +58,7 @@ class FlashcardParserNode extends CustomNode {
       } catch {
         try {
           parsed = JSON.parse(jsonrepair(raw));
-          logger.warn(
-            { raw: raw.slice(0, 500) },
-            'flashcard_json_repaired'
-          );
+          logger.warn({ raw: raw.slice(0, 500) }, 'flashcard_json_repaired');
         } catch (repairError) {
           logger.error(
             { raw: raw.slice(0, 500), err: repairError },
@@ -79,7 +76,8 @@ class FlashcardParserNode extends CustomNode {
           mnemonic: parsed.mnemonic ?? '',
           timestamp: new Date().toISOString(),
         };
-        if (parsed.exampleTranslation) result.exampleTranslation = parsed.exampleTranslation;
+        if (parsed.exampleTranslation)
+          result.exampleTranslation = parsed.exampleTranslation;
         if (parsed.pinyin) result.pinyin = parsed.pinyin;
         if (parsed.examplePinyin) result.examplePinyin = parsed.examplePinyin;
         return result;
diff --git a/backend/src/graphs/nodes/soniox-stt-ws-node.ts b/backend/src/graphs/nodes/soniox-stt-ws-node.ts
index afe231c..eb2591c 100644
--- a/backend/src/graphs/nodes/soniox-stt-ws-node.ts
+++ b/backend/src/graphs/nodes/soniox-stt-ws-node.ts
@@ -54,11 +54,7 @@ class SonioxSession {
   ) {}
 
   public async ensureConnection(): Promise<void> {
-    if (
-      !this.ws ||
-      !this.wsReady ||
-      this.ws.readyState !== WebSocket.OPEN
-    ) {
+    if (!this.ws || !this.wsReady || this.ws.readyState !== WebSocket.OPEN) {
       this.closeWebSocket();
       this.initializeWebSocket();
     }
@@ -265,10 +261,7 @@ export class SonioxSTTWebSocketNode extends CustomNode implements STTNode {
   private readonly TURN_COMPLETION_TIMEOUT_MS = 2000;
   private readonly MAX_TRANSCRIPTION_DURATION_MS = 40000;
 
-  constructor(props: {
-    id?: string;
-    config: SonioxSTTWebSocketNodeConfig;
-  }) {
+  constructor(props: { id?: string; config: SonioxSTTWebSocketNodeConfig }) {
     const { config, ...nodeProps } = props;
 
     if (!config.apiKey) {
@@ -361,13 +354,12 @@ export class SonioxSTTWebSocketNode extends CustomNode implements STTNode {
     let textContent: string | undefined;
 
     // Soniox token accumulation
-    let finalTokenTexts: string[] = [];
+    const finalTokenTexts: string[] = [];
 
     // Derive per-session language hints from the connection's active language
     const targetLang = connection.state.languageCode || 'es';
-    const sessionLanguageHints = targetLang === 'en'
-      ? ['en']
-      : ['en', targetLang];
+    const sessionLanguageHints =
+      targetLang === 'en' ? ['en'] : ['en', targetLang];
 
     // Get or create session
     let session = this.sessions.get(sessionId);
@@ -440,8 +432,12 @@ export class SonioxSTTWebSocketNode extends CustomNode implements STTNode {
         }
 
         // Trigger speech detected on first meaningful text
-        if (!speechDetected && (nonFinalTexts.length > 0 || finalTokenTexts.length > 0)) {
-          const hasText = nonFinalTexts.some((t) => t.trim().length > 0) ||
+        if (
+          !speechDetected &&
+          (nonFinalTexts.length > 0 || finalTokenTexts.length > 0)
+        ) {
+          const hasText =
+            nonFinalTexts.some((t) => t.trim().length > 0) ||
             finalTokenTexts.some((t) => t.trim().length > 0);
           if (hasText) {
             speechDetected = true;
@@ -454,7 +450,9 @@ export class SonioxSTTWebSocketNode extends CustomNode implements STTNode {
 
         // Send partial transcript from non-final tokens
         if (nonFinalTexts.length > 0) {
-          const partialText = [...finalTokenTexts, ...nonFinalTexts].join('').trim();
+          const partialText = [...finalTokenTexts, ...nonFinalTexts]
+            .join('')
+            .trim();
           if (partialText) {
             this.sendPartialTranscript(
               sessionId,
@@ -481,7 +479,10 @@ export class SonioxSTTWebSocketNode extends CustomNode implements STTNode {
             connection.pendingTranscript = undefined;
           } else {
             logger.debug(
-              { iteration, transcriptSnippet: finalTranscript.substring(0, 50) },
+              {
+                iteration,
+                transcriptSnippet: finalTranscript.substring(0, 50),
+              },
               'endpoint_detected'
             );
           }
diff --git a/backend/src/helpers/anki-exporter.ts b/backend/src/helpers/anki-exporter.ts
index 15b62bf..d4d2c7b 100644
--- a/backend/src/helpers/anki-exporter.ts
+++ b/backend/src/helpers/anki-exporter.ts
@@ -45,7 +45,11 @@ export class AnkiExporter {
         return;
       }
 
-      const front = this.formatCardFront(flashcard, targetWord.trim(), audioMap);
+      const front = this.formatCardFront(
+        flashcard,
+        targetWord.trim(),
+        audioMap
+      );
       const back = this.formatCardBack(flashcard, audioMap, imageMap);
 
       const tags = ['inworld-language-tutor'];
diff --git a/backend/src/helpers/image-generator.ts b/backend/src/helpers/image-generator.ts
index d92b7ca..5e397ff 100644
--- a/backend/src/helpers/image-generator.ts
+++ b/backend/src/helpers/image-generator.ts
@@ -121,10 +121,7 @@ export async function generateBatchImages(
         results.set(word, image);
       }
     } catch (error) {
-      logger.warn(
-        { word, err: error },
-        'image_generation_failed_for_word'
-      );
+      logger.warn({ word, err: error }, 'image_generation_failed_for_word');
     }
     i++;
   }
diff --git a/backend/src/helpers/tts-audio-generator.ts b/backend/src/helpers/tts-audio-generator.ts
index 8bdfd2f..2a16055 100644
--- a/backend/src/helpers/tts-audio-generator.ts
+++ b/backend/src/helpers/tts-audio-generator.ts
@@ -99,7 +99,10 @@ export async function generateBatchTTSAudio(
         const sanitized = word
           .trim()
           .toLowerCase()
-          .replace(/[^a-z0-9\u00C0-\u024F\u1E00-\u1EFF\u3000-\u9FFF\uAC00-\uD7AF]/g, '_')
+          .replace(
+            /[^a-z0-9\u00C0-\u024F\u1E00-\u1EFF\u3000-\u9FFF\uAC00-\uD7AF]/g,
+            '_'
+          )
           .replace(/_+/g, '_')
           .replace(/^_|_$/g, '');
         const filename = `tts_${sanitized}_${i}.wav`;
diff --git a/backend/src/services/api-routes.ts b/backend/src/services/api-routes.ts
index 75e5d9f..af2abb9 100644
--- a/backend/src/services/api-routes.ts
+++ b/backend/src/services/api-routes.ts
@@ -21,11 +21,7 @@ export const apiRouter = Router();
 // ANKI export endpoint
 apiRouter.post('/export-anki', async (req, res) => {
   try {
-    const {
-      flashcards,
-      deckName,
-      languageCode,
-    } = req.body;
+    const { flashcards, deckName, languageCode } = req.body;
 
     if (!flashcards || !Array.isArray(flashcards) || flashcards.length === 0) {
       res.status(400).json({ error: 'No flashcards provided' });
@@ -45,8 +41,11 @@ apiRouter.post('/export-anki', async (req, res) => {
     const wordToEnglish = new Map<string, string>();
 
     for (const fc of flashcards as Flashcard[]) {
-      const word =
-        (fc.targetWord || (fc as { spanish?: string }).spanish || '').trim();
+      const word = (
+        fc.targetWord ||
+        (fc as { spanish?: string }).spanish ||
+        ''
+      ).trim();
       if (word) {
         texts.push(word);
         if (fc.english) {
@@ -61,7 +60,11 @@ apiRouter.post('/export-anki', async (req, res) => {
     const uniqueTexts = [...new Set(texts)];
 
     logger.info(
-      { textCount: uniqueTexts.length, imageCount: wordToEnglish.size, languageCode: lang },
+      {
+        textCount: uniqueTexts.length,
+        imageCount: wordToEnglish.size,
+        languageCode: lang,
+      },
       'anki_export_generating_media'
     );
 
diff --git a/backend/src/services/websocket-handler.ts b/backend/src/services/websocket-handler.ts
index 7c3f3de..426730b 100644
--- a/backend/src/services/websocket-handler.ts
+++ b/backend/src/services/websocket-handler.ts
@@ -285,7 +285,12 @@ function handleMessage(
     } else if (message.type === 'tts_pronounce_request') {
       handleTTSPronounce(connectionId, ws, message);
     } else if (message.type === 'create_flashcard_request') {
-      handleCreateFlashcardRequest(connectionId, ws, connectionManager, message);
+      handleCreateFlashcardRequest(
+        connectionId,
+        ws,
+        connectionManager,
+        message
+      );
     } else {
       logger.debug(
         { connectionId, messageType: message.type },
@@ -743,10 +748,7 @@ async function handleCreateFlashcardRequest(
     content: m.content,
   }));
 
-  logger.info(
-    { connectionId, word, languageCode },
-    'create_flashcard_request'
-  );
+  logger.info({ connectionId, word, languageCode }, 'create_flashcard_request');
 
   try {
     const attrs = connectionAttributes.get(connectionId) || {};
diff --git a/frontend/src/components/ChatSection.tsx b/frontend/src/components/ChatSection.tsx
index b8b5a19..262a890 100644
--- a/frontend/src/components/ChatSection.tsx
+++ b/frontend/src/components/ChatSection.tsx
@@ -110,17 +110,14 @@ export function ChatSection() {
     [textInput, sendTextMessage]
   );
 
-  const handleContextMenu = useCallback(
-    (e: React.MouseEvent) => {
-      const selection = window.getSelection();
-      const selectedText = selection?.toString().trim();
-      if (!selectedText) return;
+  const handleContextMenu = useCallback((e: React.MouseEvent) => {
+    const selection = window.getSelection();
+    const selectedText = selection?.toString().trim();
+    if (!selectedText) return;
 
-      e.preventDefault();
-      setContextMenu({ x: e.clientX, y: e.clientY, word: selectedText });
-    },
-    []
-  );
+    e.preventDefault();
+    setContextMenu({ x: e.clientX, y: e.clientY, word: selectedText });
+  }, []);
 
   const handleCreateFlashcard = useCallback(() => {
     if (contextMenu?.word) {
@@ -271,14 +268,21 @@ export function ChatSection() {
           style={{ top: contextMenu.y, left: contextMenu.x }}
           onClick={handleCreateFlashcard}
         >
-          <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" strokeWidth="2">
+          <svg
+            viewBox="0 0 24 24"
+            fill="none"
+            stroke="currentColor"
+            strokeWidth="2"
+          >
             <rect x="2" y="3" width="20" height="18" rx="2" />
             <line x1="12" y1="8" x2="12" y2="16" />
             <line x1="8" y1="12" x2="16" y2="12" />
           </svg>
-          Create flashcard for &ldquo;{contextMenu.word.length > 30
+          Create flashcard for &ldquo;
+          {contextMenu.word.length > 30
             ? contextMenu.word.slice(0, 30) + '…'
-            : contextMenu.word}&rdquo;
+            : contextMenu.word}
+          &rdquo;
         </div>
       )}
     </section>
diff --git a/frontend/src/components/Flashcard.tsx b/frontend/src/components/Flashcard.tsx
index d5e4726..f68cbbc 100644
--- a/frontend/src/components/Flashcard.tsx
+++ b/frontend/src/components/Flashcard.tsx
@@ -101,11 +101,17 @@ export function Flashcard({
             className={`flashcard-example ${onPronounceText ? 'pronounceable' : ''} ${isPronouncingSentence ? 'pronouncing' : ''}`}
             onClick={onPronounceText ? handlePronounceExample : undefined}
             role={onPronounceText ? 'button' : undefined}
-            aria-label={onPronounceText ? 'Pronounce example sentence' : undefined}
+            aria-label={
+              onPronounceText ? 'Pronounce example sentence' : undefined
+            }
           >
             <span>{example}</span>
             {onPronounceText && (
-              <svg className="example-speaker-icon" viewBox="0 0 24 24" fill="currentColor">
+              <svg
+                className="example-speaker-icon"
+                viewBox="0 0 24 24"
+                fill="currentColor"
+              >
                 <path d="M3 9v6h4l5 5V4L7 9H3zm13.5 3c0-1.77-1.02-3.29-2.5-4.03v8.05c1.48-.73 2.5-2.25 2.5-4.02z" />
               </svg>
             )}
@@ -114,7 +120,9 @@ export function Flashcard({
             <div className="flashcard-example-pinyin">{examplePinyin}</div>
           )}
           {exampleTranslation && (
-            <div className="flashcard-example-translation">{exampleTranslation}</div>
+            <div className="flashcard-example-translation">
+              {exampleTranslation}
+            </div>
           )}
           {mnemonic && (
             <div className="flashcard-mnemonic">
diff --git a/frontend/src/components/FlashcardsSection.tsx b/frontend/src/components/FlashcardsSection.tsx
index aa67c91..61069aa 100644
--- a/frontend/src/components/FlashcardsSection.tsx
+++ b/frontend/src/components/FlashcardsSection.tsx
@@ -148,9 +148,7 @@ export function FlashcardsSection() {
                   isPronouncing={pronouncingCardId === cardId}
                   isPronouncingSentence={
                     pronouncingCardId ===
-                    (flashcard.example ||
-                      flashcard.example_sentence ||
-                      '')
+                    (flashcard.example || flashcard.example_sentence || '')
                   }
                 />
               );
@@ -165,7 +163,8 @@ export function FlashcardsSection() {
             <div className="export-modal-spinner" />
             <div className="export-modal-text">Exporting flashcards</div>
             <div className="export-modal-subtext">
-              Generating audio &amp; images for {cardCount} card{cardCount !== 1 ? 's' : ''}...
+              Generating audio &amp; images for {cardCount} card
+              {cardCount !== 1 ? 's' : ''}...
             </div>
           </div>
         </div>
diff --git a/frontend/src/services/AudioHandler.ts b/frontend/src/services/AudioHandler.ts
index a1105be..fb1ae02 100644
--- a/frontend/src/services/AudioHandler.ts
+++ b/frontend/src/services/AudioHandler.ts
@@ -293,7 +293,9 @@ export class AudioHandler {
   mute(): void {
     if (!this.isMuted) {
       this.isMuted = true;
-      console.log('[AudioHandler] Muted — suppressing audio chunks during TTS playback');
+      console.log(
+        '[AudioHandler] Muted — suppressing audio chunks during TTS playback'
+      );
     }
   }
 

From 3b0448a7c5131ebcf5faae17184b34074b02d559 Mon Sep 17 00:00:00 2001
From: Cale Shapera <25466659+cshape@users.noreply.github.com>
Date: Mon, 23 Feb 2026 17:03:16 -0800
Subject: [PATCH 16/16] fix: cleaned up audio player path

---
 frontend/src/services/AudioPlayer.ts | 75 +++-------------------------
 1 file changed, 7 insertions(+), 68 deletions(-)

diff --git a/frontend/src/services/AudioPlayer.ts b/frontend/src/services/AudioPlayer.ts
index 0910757..3dc89fd 100644
--- a/frontend/src/services/AudioPlayer.ts
+++ b/frontend/src/services/AudioPlayer.ts
@@ -6,8 +6,6 @@ export class AudioPlayer {
   private audioContext: AudioContext | null = null;
   private audioQueue: AudioBuffer[] = [];
   private isPlaying = false;
-  private isStartingPlayback = false;
-  private currentSource: AudioBufferSourceNode | null = null;
   private listeners = new Map<string, EventCallback[]>();
   private streamTimeout: ReturnType<typeof setTimeout> | null = null;
   private isIOS: boolean;
@@ -15,8 +13,7 @@ export class AudioPlayer {
   private nextStartTime: number = 0;
   private scheduledSources: AudioBufferSourceNode[] = [];
   private scheduleInterval: ReturnType<typeof setInterval> | null = null;
-  private readonly SCHEDULE_AHEAD_TIME = 0.3; // Schedule 300ms ahead for mobile timer resilience
-  private readonly FADE_SAMPLES = 256; // ~11ms at 22050Hz TTS rate
+  private readonly SCHEDULE_AHEAD_TIME = 0.3;
 
   constructor() {
     this.isIOS =
@@ -115,24 +112,17 @@ export class AudioPlayer {
         bytes[i] = binaryString.charCodeAt(i);
       }
 
-      // Create audio buffer
       const audioBuffer = await this.createAudioBuffer(
         bytes.buffer,
         sampleRate,
         audioFormat
       );
-      this.applyFadeEnvelope(audioBuffer);
 
       this.audioQueue.push(audioBuffer);
 
-      // Start playback immediately if not already playing
-      if (!this.isPlaying && !this.isStartingPlayback) {
-        this.isStartingPlayback = true;
+      if (!this.isPlaying) {
         this.startScheduleInterval();
-        requestAnimationFrame(() => {
-          this.isStartingPlayback = false;
-          this.scheduleBuffers();
-        });
+        this.scheduleBuffers();
       }
     } catch (error) {
       console.error('Error processing audio stream:', error);
@@ -150,20 +140,9 @@ export class AudioPlayer {
 
     let numSamples: number;
 
-    console.log(
-      `[AudioPlayer] createAudioBuffer: format=${audioFormat}, byteLength=${arrayBuffer.byteLength}, sampleRate=${sampleRate}`
-    );
-
     if (audioFormat === 'float32') {
       const float32Array = new Float32Array(arrayBuffer);
       numSamples = float32Array.length;
-      console.log(
-        `[AudioPlayer] Float32 samples: ${numSamples}, first 3 values: [${Array.from(
-          float32Array.slice(0, 3)
-        )
-          .map((v) => v.toFixed(4))
-          .join(', ')}]`
-      );
 
       const audioBuffer = this.audioContext.createBuffer(
         1,
@@ -181,7 +160,6 @@ export class AudioPlayer {
       // Int16 PCM format
       const int16Array = new Int16Array(arrayBuffer);
       numSamples = int16Array.length;
-      console.log(`[AudioPlayer] Int16 samples: ${numSamples}`);
 
       const audioBuffer = this.audioContext.createBuffer(
         1,
@@ -198,24 +176,6 @@ export class AudioPlayer {
     }
   }
 
-  private applyFadeEnvelope(audioBuffer: AudioBuffer): void {
-    const channelData = audioBuffer.getChannelData(0);
-    const length = channelData.length;
-    const fadeLength = Math.min(this.FADE_SAMPLES, Math.floor(length / 4));
-
-    // Fade-in at start
-    for (let i = 0; i < fadeLength; i++) {
-      const gain = i / fadeLength;
-      channelData[i] *= gain;
-    }
-
-    // Fade-out at end
-    for (let i = 0; i < fadeLength; i++) {
-      const gain = i / fadeLength;
-      channelData[length - 1 - i] *= gain;
-    }
-  }
-
   private scheduleBuffers(): void {
     if (!this.audioContext || this.audioQueue.length === 0) {
       return;
@@ -223,16 +183,8 @@ export class AudioPlayer {
 
     const currentTime = this.audioContext.currentTime;
 
-    // Handle queue underrun with safety margin
     if (this.nextStartTime < currentTime) {
-      const underrunAmount = currentTime - this.nextStartTime;
-      if (underrunAmount > 0.05) {
-        console.warn(
-          `[AudioPlayer] Queue underrun: ${(underrunAmount * 1000).toFixed(1)}ms behind`
-        );
-      }
-      // Add small margin to ensure we're not scheduling in the past
-      this.nextStartTime = currentTime + 0.005;
+      this.nextStartTime = currentTime;
     }
 
     // Schedule buffers that should start within SCHEDULE_AHEAD_TIME
@@ -261,6 +213,9 @@ export class AudioPlayer {
         this.scheduledSources.splice(index, 1);
       }
 
+      // Backstop: ensure more buffers get scheduled even if setInterval is delayed
+      this.scheduleBuffers();
+
       if (this.scheduledSources.length === 0 && this.audioQueue.length === 0) {
         this.isPlaying = false;
         this.stopScheduleInterval();
@@ -270,9 +225,6 @@ export class AudioPlayer {
 
     try {
       source.start(startTime);
-      console.log(
-        `Scheduled buffer: ${audioBuffer.duration.toFixed(3)}s at ${startTime.toFixed(3)}`
-      );
 
       if (!this.isPlaying) {
         this.isPlaying = true;
@@ -312,7 +264,6 @@ export class AudioPlayer {
     if (this.isIOS && this.iosHandler) {
       this.iosHandler.stopAudioPlayback?.();
       this.isPlaying = false;
-      this.isStartingPlayback = false;
       this.emit('playback_stopped');
       return;
     }
@@ -332,20 +283,8 @@ export class AudioPlayer {
     this.scheduledSources = [];
     this.nextStartTime = 0;
 
-    if (this.currentSource) {
-      try {
-        this.currentSource.stop();
-        this.currentSource.disconnect();
-        this.currentSource = null;
-      } catch (error) {
-        console.warn('Error stopping audio source:', error);
-      }
-    }
-
-    // Clear audio queue to prevent any queued audio from playing
     this.audioQueue = [];
     this.isPlaying = false;
-    this.isStartingPlayback = false;
     this.emit('playback_stopped');
   }