From 5c70538a285583f477e83b87c82afd07622fa029 Mon Sep 17 00:00:00 2001
From: ishan bansal <ishanbansal543210@gmail.com>
Date: Mon, 2 Feb 2026 23:32:12 +0530
Subject: [PATCH 1/4] Add context-aware interruption handling and Mistral AI
 support

- Implement passive word filtering (yeah, ok, hmm) that doesn't interrupt agent speech
- Add active interrupt words (stop, wait, hold on) that immediately stop agent
- Add LLM provider selection via LLM_PROVIDER env variable (openai/mistral)
- Use manual turn detection to control when user input reaches LLM
- Track agent speaking state to determine interruption behavior
---
 examples/voice_agents/basic_agent.py | 143 +++++++++++++++++++++++++--
 1 file changed, 133 insertions(+), 10 deletions(-)

diff --git a/examples/voice_agents/basic_agent.py b/examples/voice_agents/basic_agent.py
index f064dab5d7..cabcd23ff6 100644
--- a/examples/voice_agents/basic_agent.py
+++ b/examples/voice_agents/basic_agent.py
@@ -1,4 +1,6 @@
 import logging
+import os
+import re
 
 from dotenv import load_dotenv
 
@@ -15,8 +17,11 @@
     room_io,
 )
 from livekit.agents.llm import function_tool
+from livekit.agents.voice.events import (
+    AgentStateChangedEvent,
+    UserInputTranscribedEvent,
+)
 from livekit.plugins import silero
-from livekit.plugins.turn_detector.multilingual import MultilingualModel
 
 # uncomment to enable Krisp background voice/noise cancellation
 # from livekit.plugins import noise_cancellation
@@ -25,8 +30,57 @@
 
 load_dotenv()
 
+# ====================================
+# CONTEXT-AWARE INTERRUPTION SETTINGS
+# ====================================
+
+# Words that should NOT interrupt agent speech or reach the LLM
+IGNORE_WORDS = {
+    "yeah", "yes", "ok", "okay", "hmm", "uh-huh", "uh huh",
+    "mhm", "mm-hmm", "right", "sure", "yep", "yup", "aha",
+    "mm", "mhmm", "alright", "got", "it"
+}
+
+# Words that should ALWAYS interrupt agent speech
+INTERRUPT_WORDS = {
+    "stop", "wait", "hold on", "hold up", "no", "cancel",
+    "nevermind", "never mind", "actually", "but", "however",
+    "pause", "hold"
+}
+
+def normalize_text(text: str) -> str:
+    """Normalize text for comparison - lowercase and remove punctuation."""
+    return re.sub(r'[^\w\s]', '', text.lower())
+
+def is_only_ignore_words(text: str) -> bool:
+    """Check if text contains ONLY passive acknowledgement words."""
+    normalized = normalize_text(text)
+    words = normalized.split()
+    if not words:
+        return False
+    return all(word in IGNORE_WORDS for word in words)
+
+def contains_interrupt_word(text: str) -> bool:
+    """Check if text contains any active interrupt command."""
+    normalized = normalize_text(text)
+    words = normalized.split()
+    return any(word in INTERRUPT_WORDS for word in words)
+
+def get_llm_model():
+    """Get LLM model based on environment variable."""
+    provider = os.getenv("LLM_PROVIDER", "openai").lower()
+    if provider == "mistral":
+        return "mistral/mistral-small"
+    return "openai/gpt-4o-mini"
+
+llm_model = get_llm_model()
+logger.info(f"Using LLM: {llm_model}")
+
 
 class MyAgent(Agent):
+    # Class variable to track state when user speech was detected
+    _agent_was_speaking_at_user_speech = False
+    
     def __init__(self) -> None:
         super().__init__(
             instructions="Your name is Kelly. You would interact with users via voice."
@@ -35,12 +89,43 @@ def __init__(self) -> None:
             "You are curious and friendly, and have a sense of humor."
             "you will speak english to the user",
         )
+        self._is_speaking = False
 
     async def on_enter(self):
         # when the agent is added to the session, it'll generate a reply
         # according to its instructions
         self.session.generate_reply()
 
+    async def on_user_turn_completed(self, turn_ctx, new_message):
+        """
+        CRITICAL: This is called BEFORE the LLM processes user input.
+        With manual turn detection, we control when to commit user turns and generate replies.
+        """
+        user_text = new_message.text_content
+        
+        # Use the captured state from when user started speaking
+        was_speaking = MyAgent._agent_was_speaking_at_user_speech
+        
+        # Check if agent was speaking and user said only passive words
+        if was_speaking and is_only_ignore_words(user_text):
+            logger.info(f"✓ IGNORING passive words during speech: '{user_text}'")
+            # Don't call generate_reply - this prevents LLM from processing it
+            return
+        
+        if contains_interrupt_word(user_text):
+            logger.info(f"✗ INTERRUPT command detected: '{user_text}' - stopping agent")
+            # Interrupt the agent's current speech
+            await self.session.interrupt()
+        elif was_speaking:
+            logger.info(f"⚠️  Interrupting with: '{user_text}'")
+            # Non-passive words during speech - interrupt
+            await self.session.interrupt()
+        else:
+            logger.info(f"💬 Processing user input: '{user_text}'")
+        
+        # Let the default behavior handle it (generate reply)
+        return await super().on_user_turn_completed(turn_ctx, new_message)
+
     # all functions annotated with @function_tool will be passed to the LLM when this
     # agent is active
     @function_tool
@@ -84,24 +169,62 @@ async def entrypoint(ctx: JobContext):
         # See all available models at https://docs.livekit.io/agents/models/stt/
         stt="deepgram/nova-3",
         # A Large Language Model (LLM) is your agent's brain, processing user input and generating a response
-        # See all available models at https://docs.livekit.io/agents/models/llm/
-        llm="openai/gpt-4.1-mini",
+        # Configurable via LLM_PROVIDER environment variable (openai or mistral)
+        llm=llm_model,
         # Text-to-speech (TTS) is your agent's voice, turning the LLM's text into speech that the user can hear
         # See all available models as well as voice selections at https://docs.livekit.io/agents/models/tts/
-        tts="cartesia/sonic-2:9626c31c-bec5-4cca-baa8-f8ba9e84c8bc",
-        # VAD and turn detection are used to determine when the user is speaking and when the agent should respond
+        tts="cartesia/sonic-2",
+        # Manual turn detection - we control when turns complete to filter passive words
         # See more at https://docs.livekit.io/agents/build/turns
-        turn_detection=MultilingualModel(),
+        turn_detection="manual",
         vad=ctx.proc.userdata["vad"],
         # allow the LLM to generate a response while waiting for the end of turn
         # See more at https://docs.livekit.io/agents/build/audio/#preemptive-generation
         preemptive_generation=True,
-        # sometimes background noise could interrupt the agent session, these are considered false positive interruptions
-        # when it's detected, you may resume the agent's speech
-        resume_false_interruption=True,
-        false_interruption_timeout=1.0,
+        # SMART INTERRUPTION: Allow listening during speech, but control interruption manually
+        allow_interruptions=True,
+        # Minimum words before triggering interruption (we handle filtering in event handlers)
+        min_interruption_words=1,
     )
 
+    # ====================================
+    # CONTEXT-AWARE INTERRUPTION HANDLERS
+    # ====================================
+    
+    # Get reference to the agent to update its state
+    agent = MyAgent()
+    
+    @session.on("agent_state_changed")
+    def on_agent_state_changed(event: AgentStateChangedEvent):
+        """Track when agent starts/stops speaking."""
+        agent._is_speaking = (event.new_state == "speaking")
+        if event.new_state == "speaking":
+            logger.info("🎤 Agent started speaking")
+        elif event.new_state == "listening":
+            logger.info("🎧 Agent stopped speaking")
+    
+    @session.on("user_input_transcribed")
+    def on_user_transcript(event: UserInputTranscribedEvent):
+        """
+        Capture agent state when user speaks and manually commit turns.
+        With manual turn detection, we decide when to commit user input.
+        """
+        if event.is_final and event.transcript.strip():
+            # Store whether agent was speaking when this transcript started
+            MyAgent._agent_was_speaking_at_user_speech = agent._is_speaking
+            logger.info(f"📝 Transcript: '{event.transcript}' (agent_was_speaking={agent._is_speaking})")
+            
+            # MANUAL TURN CONTROL: Decide whether to commit this as a user turn
+            # Check if it's only passive words while agent is speaking
+            if agent._is_speaking and is_only_ignore_words(event.transcript):
+                logger.info(f"🛑 Skipping turn commit for passive words: '{event.transcript}'")
+                # Do NOT commit - agent continues speaking
+                return
+            
+            # Otherwise, commit the turn (this will trigger on_user_turn_completed)
+            logger.info(f"✓ Committing user turn: '{event.transcript}'")
+            session.commit_user_turn()
+
     # log metrics as they are emitted, and total usage after session is over
     usage_collector = metrics.UsageCollector()
 

From ab95199f6a81aebed1c72e6d14948319e78d2f2c Mon Sep 17 00:00:00 2001
From: ishan bansal <ishanbansal543210@gmail.com>
Date: Mon, 2 Feb 2026 23:36:29 +0530
Subject: [PATCH 2/4] Add context-aware interruption handling and Mistral AI
 support

Features:
- Implement passive word filtering (yeah, ok, hmm) that doesn't interrupt agent speech
- Add active interrupt words (stop, wait, hold on) for immediate interruption
- Add LLM provider selection via LLM_PROVIDER env variable (openai/mistral)
- Use manual turn detection to control when user input reaches LLM
- Track agent speaking state to determine interruption behavior
- Update requirements.txt with Mistral AI plugin dependency
---
 examples/voice_agents/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/voice_agents/requirements.txt b/examples/voice_agents/requirements.txt
index 1d6a274bad..7ec1947c96 100644
--- a/examples/voice_agents/requirements.txt
+++ b/examples/voice_agents/requirements.txt
@@ -1,3 +1,4 @@
 livekit-agents[openai, cartesia, elevenlabs, deepgram, silero, turn-detector, mcp]>=1.0
+livekit-plugins-mistralai>=0.1.0
 python-dotenv>=1.0
 duckduckgo-search>=8.0
\ No newline at end of file

From d51b1aadd555bd52767f541e9b3c1f94f7ff157b Mon Sep 17 00:00:00 2001
From: ishan bansal <ishanbansal543210@gmail.com>
Date: Mon, 2 Feb 2026 23:38:41 +0530
Subject: [PATCH 3/4] Update README with context-aware interruption features

---
 examples/voice_agents/README.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/examples/voice_agents/README.md b/examples/voice_agents/README.md
index aa401505d1..26e167f81d 100644
--- a/examples/voice_agents/README.md
+++ b/examples/voice_agents/README.md
@@ -6,7 +6,11 @@ This directory contains a comprehensive collection of voice-based agent examples
 
 ### 🚀 Getting Started
 
-- [`basic_agent.py`](./basic_agent.py) - A fundamental voice agent with metrics collection
+- [`basic_agent.py`](./basic_agent.py) - A fundamental voice agent with context-aware interruption handling, Mistral AI support, and metrics collection
+  - **Context-Aware Interruptions**: Passive acknowledgements ("yeah", "ok", "hmm") don't interrupt agent speech
+  - **Active Commands**: Interrupt words ("stop", "wait", "hold on") immediately stop the agent
+  - **Multi-LLM Support**: Switch between OpenAI and Mistral AI via `LLM_PROVIDER` environment variable
+  - **Manual Turn Detection**: Full control over when user input reaches the LLM
 
 ### 🛠️ Tool Integration & Function Calling
 

From 5873b5f8dd6a5b111b47f8256d7c92fbac33697e Mon Sep 17 00:00:00 2001
From: ishan bansal <ishanbansal543210@gmail.com>
Date: Mon, 2 Feb 2026 23:39:56 +0530
Subject: [PATCH 4/4] Update main README with context-aware interruption and
 Mistral AI features

---
 README.md | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 2a09aac241..1f425a879e 100644
--- a/README.md
+++ b/README.md
@@ -32,11 +32,13 @@ agents that can see, hear, and understand.
 
 ## Features
 
-- **Flexible integrations**: A comprehensive ecosystem to mix and match the right STT, LLM, TTS, and Realtime API to suit your use case.
+- **Context-aware interruption handling**: Smart interruption logic that distinguishes between passive acknowledgements ("yeah", "ok", "hmm") and active commands ("stop", "wait"), allowing agents to continue speaking when users are simply listening.
+- **Flexible integrations**: A comprehensive ecosystem to mix and match the right STT, LLM, TTS, and Realtime API to suit your use case. Now includes support for Mistral AI in addition to OpenAI.
 - **Integrated job scheduling**: Built-in task scheduling and distribution with [dispatch APIs](https://docs.livekit.io/agents/build/dispatch/) to connect end users to agents.
 - **Extensive WebRTC clients**: Build client applications using LiveKit's open-source SDK ecosystem, supporting all major platforms.
 - **Telephony integration**: Works seamlessly with LiveKit's [telephony stack](https://docs.livekit.io/sip/), allowing your agent to make calls to or receive calls from phones.
 - **Exchange data with clients**: Use [RPCs](https://docs.livekit.io/home/client/data/rpc/) and other [Data APIs](https://docs.livekit.io/home/client/data/) to seamlessly exchange data with clients.
+- **Manual turn detection**: Full control over when user input reaches the LLM, enabling sophisticated conversation flow management.
 - **Semantic turn detection**: Uses a transformer model to detect when a user is done with their turn, helps to reduce interruptions.
 - **MCP support**: Native support for MCP. Integrate tools provided by MCP servers with one loc.
 - **Builtin test framework**: Write tests and use judges to ensure your agent is performing as expected.
@@ -48,6 +50,7 @@ To install the core Agents library, along with plugins for popular model provide
 
 ```bash
 pip install "livekit-agents[openai,silero,deepgram,cartesia,turn-detector]~=1.0"
+pip install "livekit-plugins-mistralai>=0.1.0"  # For Mistral AI support
 ```
 
 ## Docs and guides