From 9774dce0f201fbfdeaad88c00dbb2702b7aef345 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 17:15:38 +0000
Subject: [PATCH 1/9] Add voice chat with real-time speech-to-text and model
 responses

- Add VoiceChat screen with microphone recording, real-time transcription
  via OpenAI Whisper, and streaming AI model responses
- Add server-side /chat/transcribe endpoint for audio transcription
- Add text-to-speech for reading AI responses aloud (expo-speech)
- Support all chat providers (Claude, GPT, Gemini)
- Add Voice tab to bottom navigation with mic icon
- Install expo-av for audio recording and expo-speech for TTS

Co-Authored-By: Nader Dabit <dabit3@gmail.com>
---
 app/package.json              |   2 +
 app/pnpm-lock.yaml            |  38 ++-
 app/src/main.tsx              |  16 +-
 app/src/screens/index.ts      |   1 +
 app/src/screens/voice.tsx     | 492 ++++++++++++++++++++++++++++++++++
 server/src/chat/chatRouter.ts |   4 +
 server/src/chat/transcribe.ts |  39 +++
 7 files changed, 587 insertions(+), 5 deletions(-)
 create mode 100644 app/src/screens/voice.tsx
 create mode 100644 server/src/chat/transcribe.ts

diff --git a/app/package.json b/app/package.json
index 9fd65c86..87189261 100644
--- a/app/package.json
+++ b/app/package.json
@@ -19,11 +19,13 @@
     "@react-navigation/native-stack": "^7.3.10",
     "@ronradtke/react-native-markdown-display": "^8.0.0",
     "expo": "^54.0.0",
+    "expo-av": "^16.0.8",
     "expo-clipboard": "~8.0.8",
     "expo-document-picker": "~14.0.8",
     "expo-file-system": "~19.0.21",
     "expo-font": "~14.0.11",
     "expo-image-picker": "~17.0.10",
+    "expo-speech": "^55.0.13",
     "expo-splash-screen": "~31.0.13",
     "expo-status-bar": "~3.0.9",
     "punycode": "^2.3.1",
diff --git a/app/pnpm-lock.yaml b/app/pnpm-lock.yaml
index 44516e7e..cc4fea4b 100644
--- a/app/pnpm-lock.yaml
+++ b/app/pnpm-lock.yaml
@@ -38,6 +38,9 @@ importers:
       expo:
         specifier: ^54.0.0
         version: 54.0.32(@babel/core@7.28.6)(@expo/metro-runtime@6.1.2)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0)
+      expo-av:
+        specifier: ^16.0.8
+        version: 16.0.8(expo@54.0.32)(react-native-web@0.20.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0)
       expo-clipboard:
         specifier: ~8.0.8
         version: 8.0.8(expo@54.0.32)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0)
@@ -53,6 +56,9 @@ importers:
       expo-image-picker:
         specifier: ~17.0.10
         version: 17.0.10(expo@54.0.32)
+      expo-speech:
+        specifier: ^55.0.13
+        version: 55.0.13(expo@54.0.32)
       expo-splash-screen:
         specifier: ~31.0.13
         version: 31.0.13(expo@54.0.32)
@@ -1557,6 +1563,17 @@ packages:
       react: '*'
       react-native: '*'
 
+  expo-av@16.0.8:
+    resolution: {integrity: sha512-cmVPftGR/ca7XBgs7R6ky36lF3OC0/MM/lpgX/yXqfv0jASTsh7AYX9JxHCwFmF+Z6JEB1vne9FDx4GiLcGreQ==}
+    peerDependencies:
+      expo: '*'
+      react: '*'
+      react-native: '*'
+      react-native-web: '*'
+    peerDependenciesMeta:
+      react-native-web:
+        optional: true
+
   expo-clipboard@8.0.8:
     resolution: {integrity: sha512-VKoBkHIpZZDJTB0jRO4/PZskHdMNOEz3P/41tmM6fDuODMpqhvyWK053X0ebspkxiawJX9lX33JXHBCvVsTTOA==}
     peerDependencies:
@@ -1618,6 +1635,11 @@ packages:
     resolution: {integrity: sha512-IGR++flYH70rhLyeXF0Phle56/k4cee87WeQ4mamS+MkVAVP+dDlOHf2nN06Z9Y2KhU0Gp1k+y61KkghF7HdhA==}
     engines: {node: '>=20.16.0'}
 
+  expo-speech@55.0.13:
+    resolution: {integrity: sha512-DZzwe9Z6NhHmNi5ICwYE9BgcNmE3U8t0pSlg1vs18f6omyD88e1ZqCJ0as1aWgZYO3WtbcwWqCTboV8QMwtOow==}
+    peerDependencies:
+      expo: '*'
+
   expo-splash-screen@31.0.13:
     resolution: {integrity: sha512-1epJLC1cDlwwj089R2h8cxaU5uk4ONVAC+vzGiTZH4YARQhL4Stlz1MbR6yAS173GMosvkE6CAeihR7oIbCkDA==}
     peerDependencies:
@@ -1974,28 +1996,24 @@ packages:
     engines: {node: '>= 12.0.0'}
     cpu: [arm64]
     os: [linux]
-    libc: [glibc]
 
   lightningcss-linux-arm64-musl@1.31.1:
     resolution: {integrity: sha512-mVZ7Pg2zIbe3XlNbZJdjs86YViQFoJSpc41CbVmKBPiGmC4YrfeOyz65ms2qpAobVd7WQsbW4PdsSJEMymyIMg==}
     engines: {node: '>= 12.0.0'}
     cpu: [arm64]
     os: [linux]
-    libc: [musl]
 
   lightningcss-linux-x64-gnu@1.31.1:
     resolution: {integrity: sha512-xGlFWRMl+0KvUhgySdIaReQdB4FNudfUTARn7q0hh/V67PVGCs3ADFjw+6++kG1RNd0zdGRlEKa+T13/tQjPMA==}
     engines: {node: '>= 12.0.0'}
     cpu: [x64]
     os: [linux]
-    libc: [glibc]
 
   lightningcss-linux-x64-musl@1.31.1:
     resolution: {integrity: sha512-eowF8PrKHw9LpoZii5tdZwnBcYDxRw2rRCyvAXLi34iyeYfqCQNA9rmUM0ce62NlPhCvof1+9ivRaTY6pSKDaA==}
     engines: {node: '>= 12.0.0'}
     cpu: [x64]
     os: [linux]
-    libc: [musl]
 
   lightningcss-win32-arm64-msvc@1.31.1:
     resolution: {integrity: sha512-aJReEbSEQzx1uBlQizAOBSjcmr9dCdL3XuC/6HLXAxmtErsj2ICo5yYggg1qOODQMtnjNQv2UHb9NpOuFtYe4w==}
@@ -4903,6 +4921,14 @@ snapshots:
     transitivePeerDependencies:
       - supports-color
 
+  expo-av@16.0.8(expo@54.0.32)(react-native-web@0.20.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0):
+    dependencies:
+      expo: 54.0.32(@babel/core@7.28.6)(@expo/metro-runtime@6.1.2)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0)
+      react: 19.1.0
+      react-native: 0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0)
+    optionalDependencies:
+      react-native-web: 0.20.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0)
+
   expo-clipboard@8.0.8(expo@54.0.32)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0):
     dependencies:
       expo: 54.0.32(@babel/core@7.28.6)(@expo/metro-runtime@6.1.2)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0)
@@ -4964,6 +4990,10 @@ snapshots:
 
   expo-server@1.0.5: {}
 
+  expo-speech@55.0.13(expo@54.0.32):
+    dependencies:
+      expo: 54.0.32(@babel/core@7.28.6)(@expo/metro-runtime@6.1.2)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0)
+
   expo-splash-screen@31.0.13(expo@54.0.32):
     dependencies:
       '@expo/prebuild-config': 54.0.8(expo@54.0.32)
diff --git a/app/src/main.tsx b/app/src/main.tsx
index 78f382d3..685de076 100644
--- a/app/src/main.tsx
+++ b/app/src/main.tsx
@@ -1,7 +1,7 @@
 import { useContext, useRef, useCallback } from 'react';
 import { StyleSheet, Text, View, Button } from 'react-native';
 import { createBottomTabNavigator } from '@react-navigation/bottom-tabs';
-import { Chat, Images, Settings } from './screens'
+import { Chat, Images, VoiceChat, Settings } from './screens'
 import { Header } from './components'
 import FeatherIcon from '@expo/vector-icons/Feather'
 import {
@@ -43,6 +43,20 @@ function MainComponent() {
             ),
           }}
         />
+        <Tab.Screen
+          name="Voice"
+          component={VoiceChat}
+          options={{
+            header: () => <Header />,
+            tabBarIcon: ({ color, size }) => (
+              <FeatherIcon
+                name="mic"
+                color={color}
+                size={size}
+              />
+            ),
+          }}
+        />
         <Tab.Screen
           name="Images"
           component={Images}
diff --git a/app/src/screens/index.ts b/app/src/screens/index.ts
index 73ce53c7..004c6730 100644
--- a/app/src/screens/index.ts
+++ b/app/src/screens/index.ts
@@ -1,3 +1,4 @@
 export { Chat } from './chat'
 export { Images } from './images'
+export { VoiceChat } from './voice'
 export { Settings } from './settings'
\ No newline at end of file
diff --git a/app/src/screens/voice.tsx b/app/src/screens/voice.tsx
new file mode 100644
index 00000000..90c15c09
--- /dev/null
+++ b/app/src/screens/voice.tsx
@@ -0,0 +1,492 @@
+import {
+  View,
+  Text,
+  StyleSheet,
+  TouchableOpacity,
+  ScrollView,
+  ActivityIndicator,
+  Animated,
+  Platform,
+} from 'react-native'
+import { useContext, useState, useRef, useEffect } from 'react'
+import { ThemeContext, AppContext } from '../context'
+import { getEventSource, getFirstNCharsOrLess, getChatType } from '../utils'
+import { DOMAIN } from '../../constants'
+import { Audio } from 'expo-av'
+import * as Speech from 'expo-speech'
+import Ionicons from '@expo/vector-icons/Ionicons'
+import Markdown from '@ronradtke/react-native-markdown-display'
+
+type VoiceMessage = {
+  user: string
+  assistant?: string
+}
+
+type VoiceState = 'idle' | 'recording' | 'transcribing' | 'responding'
+
+export function VoiceChat() {
+  const [messages, setMessages] = useState<VoiceMessage[]>([])
+  const [voiceState, setVoiceState] = useState<VoiceState>('idle')
+  const [currentTranscription, setCurrentTranscription] = useState('')
+  const [isSpeaking, setIsSpeaking] = useState(false)
+  const [apiMessages, setApiMessages] = useState('')
+  const recordingRef = useRef<Audio.Recording | null>(null)
+  const scrollViewRef = useRef<ScrollView | null>(null)
+  const pulseAnim = useRef(new Animated.Value(1)).current
+
+  const { theme } = useContext(ThemeContext)
+  const { chatType } = useContext(AppContext)
+  const styles = getStyles(theme)
+
+  useEffect(() => {
+    if (voiceState === 'recording') {
+      const pulse = Animated.loop(
+        Animated.sequence([
+          Animated.timing(pulseAnim, {
+            toValue: 1.2,
+            duration: 800,
+            useNativeDriver: true,
+          }),
+          Animated.timing(pulseAnim, {
+            toValue: 1,
+            duration: 800,
+            useNativeDriver: true,
+          }),
+        ])
+      )
+      pulse.start()
+      return () => pulse.stop()
+    } else {
+      pulseAnim.setValue(1)
+    }
+  }, [voiceState])
+
+  async function startRecording() {
+    try {
+      const { granted } = await Audio.requestPermissionsAsync()
+      if (!granted) return
+
+      await Audio.setAudioModeAsync({
+        allowsRecordingIOS: true,
+        playsInSilentModeIOS: true,
+      })
+
+      const { recording } = await Audio.Recording.createAsync(
+        Audio.RecordingOptionsPresets.HIGH_QUALITY
+      )
+      recordingRef.current = recording
+      setVoiceState('recording')
+    } catch (err) {
+      console.log('Failed to start recording:', err)
+    }
+  }
+
+  async function stopRecording() {
+    if (!recordingRef.current) return
+
+    setVoiceState('transcribing')
+    try {
+      await recordingRef.current.stopAndUnloadAsync()
+      await Audio.setAudioModeAsync({
+        allowsRecordingIOS: false,
+      })
+
+      const uri = recordingRef.current.getURI()
+      recordingRef.current = null
+
+      if (!uri) {
+        setVoiceState('idle')
+        return
+      }
+
+      const formData = new FormData()
+      formData.append('audio', {
+        uri,
+        type: 'audio/m4a',
+        name: 'recording.m4a',
+      } as any)
+
+      const response = await fetch(`${DOMAIN}/chat/transcribe`, {
+        method: 'POST',
+        body: formData,
+        headers: {
+          'Content-Type': 'multipart/form-data',
+        },
+      })
+
+      const data = await response.json()
+
+      if (data.text && data.text.trim()) {
+        setCurrentTranscription(data.text)
+        sendToModel(data.text)
+      } else {
+        setVoiceState('idle')
+      }
+    } catch (err) {
+      console.log('Failed to transcribe:', err)
+      setVoiceState('idle')
+    }
+  }
+
+  async function sendToModel(text: string) {
+    setVoiceState('responding')
+    let localResponse = ''
+
+    const newMessages: VoiceMessage[] = [...messages, { user: text }]
+    setMessages([...newMessages])
+    setCurrentTranscription('')
+
+    setTimeout(() => {
+      scrollViewRef.current?.scrollToEnd({ animated: true })
+    }, 100)
+
+    const chatTypeStr = getChatType(chatType)
+
+    let body: any
+    if (chatTypeStr === 'claude') {
+      const claudePrompt = `${apiMessages}\n\nHuman: ${text}\n\nAssistant:`
+      body = { prompt: claudePrompt, model: chatType.label }
+    } else if (chatTypeStr === 'gemini') {
+      body = { prompt: text, model: chatType.label }
+    } else {
+      const gptMessages = newMessages.reduce((acc: any[], msg) => {
+        acc.push({ role: 'user', content: msg.user })
+        if (msg.assistant) {
+          acc.push({ role: 'assistant', content: msg.assistant })
+        }
+        return acc
+      }, [])
+      body = { messages: gptMessages, model: chatType.label }
+    }
+
+    const es = await getEventSource({ body, type: chatTypeStr })
+
+    const listener = (event: any) => {
+      if (event.type === 'open') {
+        // connection opened
+      } else if (event.type === 'message') {
+        if (event.data !== '[DONE]') {
+          if (localResponse.length < 850) {
+            scrollViewRef.current?.scrollToEnd({ animated: true })
+          }
+          const data = JSON.parse(event.data)
+          if (chatTypeStr === 'claude') {
+            if (data.text) localResponse += data.text
+          } else if (chatTypeStr === 'gemini') {
+            localResponse += typeof data === 'string' ? data : ''
+          } else {
+            if (typeof data === 'string') {
+              localResponse += data
+            } else if (data?.content) {
+              localResponse += data.content
+            }
+          }
+          const updated = [...newMessages]
+          updated[updated.length - 1].assistant = localResponse
+          setMessages([...updated])
+        } else {
+          es.close()
+          setVoiceState('idle')
+          if (chatTypeStr === 'claude') {
+            setApiMessages(prev =>
+              `${prev}\n\nHuman: ${text}\n\nAssistant:${getFirstNCharsOrLess(localResponse, 2000)}`
+            )
+          }
+          if (localResponse) {
+            speakResponse(localResponse)
+          }
+        }
+      } else if (event.type === 'error' || event.type === 'exception') {
+        setVoiceState('idle')
+        es.close()
+      }
+    }
+
+    es.addEventListener('open', listener)
+    es.addEventListener('message', listener)
+    es.addEventListener('error', listener)
+  }
+
+  function speakResponse(text: string) {
+    const cleanText = text
+      .replace(/```[\s\S]*?```/g, ' code block ')
+      .replace(/`[^`]*`/g, '')
+      .replace(/[#*_~\[\]]/g, '')
+      .replace(/\n+/g, '. ')
+      .trim()
+
+    if (!cleanText) return
+
+    setIsSpeaking(true)
+    Speech.speak(cleanText, {
+      onDone: () => setIsSpeaking(false),
+      onStopped: () => setIsSpeaking(false),
+      rate: 1.0,
+    })
+  }
+
+  function stopSpeaking() {
+    Speech.stop()
+    setIsSpeaking(false)
+  }
+
+  function handleMicPress() {
+    if (voiceState === 'idle') {
+      if (isSpeaking) stopSpeaking()
+      startRecording()
+    } else if (voiceState === 'recording') {
+      stopRecording()
+    }
+  }
+
+  function clearConversation() {
+    setMessages([])
+    setCurrentTranscription('')
+    setApiMessages('')
+    if (isSpeaking) stopSpeaking()
+  }
+
+  const getStatusText = () => {
+    switch (voiceState) {
+      case 'recording': return 'Listening...'
+      case 'transcribing': return 'Transcribing...'
+      case 'responding': return 'Thinking...'
+      default: return isSpeaking ? 'Speaking...' : 'Tap to speak'
+    }
+  }
+
+  const getMicColor = () => {
+    if (voiceState === 'recording') return '#ff4444'
+    if (voiceState === 'transcribing' || voiceState === 'responding') return theme.mutedForegroundColor
+    return theme.tintColor
+  }
+
+  return (
+    <View style={styles.container}>
+      <ScrollView
+        ref={scrollViewRef}
+        style={styles.messagesContainer}
+        contentContainerStyle={styles.messagesContent}
+        onContentSizeChange={() => scrollViewRef.current?.scrollToEnd({ animated: true })}
+      >
+        {messages.length === 0 && voiceState === 'idle' && (
+          <View style={styles.emptyState}>
+            <Ionicons name="mic-outline" size={48} color={theme.mutedForegroundColor} />
+            <Text style={styles.emptyStateTitle}>Voice Chat</Text>
+            <Text style={styles.emptyStateSubtext}>
+              Tap the microphone to start speaking.{'\n'}Your speech will be transcribed and sent to {chatType.name}.
+            </Text>
+          </View>
+        )}
+        {messages.map((msg, i) => (
+          <View key={i}>
+            <View style={styles.userBubble}>
+              <Ionicons
+                name="mic"
+                size={14}
+                color={theme.tintTextColor}
+                style={styles.micIcon}
+              />
+              <Text style={styles.userText}>{msg.user}</Text>
+            </View>
+            {msg.assistant ? (
+              <View style={styles.assistantBubble}>
+                <Markdown
+                  style={{
+                    body: {
+                      color: theme.textColor,
+                      fontFamily: theme.regularFont,
+                      fontSize: 15,
+                    },
+                    code_inline: {
+                      backgroundColor: theme.backgroundColor,
+                      color: theme.textColor,
+                      fontFamily: Platform.OS === 'ios' ? 'Menlo' : 'monospace',
+                      fontSize: 13,
+                    },
+                    fence: {
+                      backgroundColor: theme.backgroundColor,
+                      color: theme.textColor,
+                      fontFamily: Platform.OS === 'ios' ? 'Menlo' : 'monospace',
+                      fontSize: 13,
+                      padding: 10,
+                      borderRadius: 6,
+                    },
+                  }}
+                >
+                  {msg.assistant}
+                </Markdown>
+              </View>
+            ) : (
+              <View style={styles.assistantBubble}>
+                <ActivityIndicator size="small" color={theme.tintColor} />
+              </View>
+            )}
+          </View>
+        ))}
+        {currentTranscription ? (
+          <View style={styles.userBubble}>
+            <Ionicons
+              name="mic"
+              size={14}
+              color={theme.tintTextColor}
+              style={styles.micIcon}
+            />
+            <Text style={styles.userText}>{currentTranscription}</Text>
+          </View>
+        ) : null}
+      </ScrollView>
+
+      <View style={styles.controlsContainer}>
+        <Text style={styles.statusText}>{getStatusText()}</Text>
+        <View style={styles.buttonRow}>
+          {messages.length > 0 && voiceState === 'idle' && (
+            <TouchableOpacity
+              style={styles.secondaryButton}
+              onPress={clearConversation}
+            >
+              <Ionicons name="trash-outline" size={22} color={theme.textColor} />
+            </TouchableOpacity>
+          )}
+          <Animated.View style={{ transform: [{ scale: pulseAnim }] }}>
+            <TouchableOpacity
+              style={[
+                styles.micButton,
+                { backgroundColor: getMicColor() },
+                (voiceState === 'transcribing' || voiceState === 'responding') &&
+                  styles.micButtonDisabled,
+              ]}
+              onPress={handleMicPress}
+              disabled={voiceState === 'transcribing' || voiceState === 'responding'}
+              activeOpacity={0.7}
+            >
+              <Ionicons
+                name={voiceState === 'recording' ? 'stop' : 'mic'}
+                size={32}
+                color="#fff"
+              />
+            </TouchableOpacity>
+          </Animated.View>
+          {isSpeaking && voiceState === 'idle' && (
+            <TouchableOpacity
+              style={styles.secondaryButton}
+              onPress={stopSpeaking}
+            >
+              <Ionicons name="volume-mute" size={22} color={theme.textColor} />
+            </TouchableOpacity>
+          )}
+        </View>
+      </View>
+    </View>
+  )
+}
+
+function getStyles(theme: any) {
+  return StyleSheet.create({
+    container: {
+      flex: 1,
+      backgroundColor: theme.backgroundColor,
+    },
+    messagesContainer: {
+      flex: 1,
+    },
+    messagesContent: {
+      padding: 16,
+      paddingBottom: 8,
+    },
+    emptyState: {
+      alignItems: 'center',
+      justifyContent: 'center',
+      paddingTop: 80,
+      paddingHorizontal: 40,
+    },
+    emptyStateTitle: {
+      fontSize: 22,
+      fontFamily: theme.semiBoldFont,
+      color: theme.textColor,
+      marginTop: 16,
+    },
+    emptyStateSubtext: {
+      fontSize: 14,
+      fontFamily: theme.regularFont,
+      color: theme.mutedForegroundColor,
+      textAlign: 'center',
+      marginTop: 8,
+      lineHeight: 20,
+    },
+    userBubble: {
+      backgroundColor: theme.tintColor,
+      borderRadius: 16,
+      borderBottomRightRadius: 4,
+      padding: 12,
+      paddingHorizontal: 14,
+      marginBottom: 8,
+      alignSelf: 'flex-end',
+      maxWidth: '80%',
+      flexDirection: 'row',
+      alignItems: 'center',
+    },
+    micIcon: {
+      marginRight: 6,
+    },
+    userText: {
+      color: theme.tintTextColor,
+      fontSize: 15,
+      fontFamily: theme.regularFont,
+      flex: 1,
+    },
+    assistantBubble: {
+      backgroundColor: theme.borderColor,
+      borderRadius: 16,
+      borderBottomLeftRadius: 4,
+      padding: 12,
+      paddingHorizontal: 14,
+      marginBottom: 16,
+      alignSelf: 'flex-start',
+      maxWidth: '85%',
+    },
+    controlsContainer: {
+      alignItems: 'center',
+      paddingVertical: 20,
+      paddingBottom: 10,
+      borderTopWidth: 1,
+      borderTopColor: theme.borderColor,
+    },
+    statusText: {
+      fontSize: 14,
+      fontFamily: theme.mediumFont,
+      color: theme.mutedForegroundColor,
+      marginBottom: 16,
+    },
+    buttonRow: {
+      flexDirection: 'row',
+      alignItems: 'center',
+      justifyContent: 'center',
+      gap: 20,
+    },
+    micButton: {
+      width: 72,
+      height: 72,
+      borderRadius: 36,
+      justifyContent: 'center',
+      alignItems: 'center',
+      shadowColor: '#000',
+      shadowOffset: { width: 0, height: 2 },
+      shadowOpacity: 0.25,
+      shadowRadius: 4,
+      elevation: 5,
+    },
+    micButtonDisabled: {
+      opacity: 0.5,
+    },
+    secondaryButton: {
+      width: 44,
+      height: 44,
+      borderRadius: 22,
+      borderWidth: 1,
+      borderColor: theme.borderColor,
+      justifyContent: 'center',
+      alignItems: 'center',
+    },
+  })
+}
diff --git a/server/src/chat/chatRouter.ts b/server/src/chat/chatRouter.ts
index 6bb52b61..cfb291a0 100644
--- a/server/src/chat/chatRouter.ts
+++ b/server/src/chat/chatRouter.ts
@@ -1,12 +1,16 @@
 import express from 'express'
+import multer from 'multer'
 import { claude } from './claude'
 import { gpt } from './gpt'
 import { gemini } from './gemini'
+import { transcribe } from './transcribe'
 
 const router = express.Router()
+const upload = multer({ dest: 'uploads/' })
 
 router.post('/claude', claude)
 router.post('/gpt', gpt)
 router.post('/gemini', gemini)
+router.post('/transcribe', upload.single('audio'), transcribe)
 
 export default router
diff --git a/server/src/chat/transcribe.ts b/server/src/chat/transcribe.ts
new file mode 100644
index 00000000..f34538ab
--- /dev/null
+++ b/server/src/chat/transcribe.ts
@@ -0,0 +1,39 @@
+import { Request, Response } from "express"
+import asyncHandler from 'express-async-handler'
+import fs from 'fs'
+
+export const transcribe = asyncHandler(async (req: Request, res: Response) => {
+  try {
+    if (!req.file) {
+      res.status(400).json({ error: 'No audio file provided' })
+      return
+    }
+
+    const fileBuffer = fs.readFileSync(req.file.path)
+    const blob = new Blob([fileBuffer], { type: req.file.mimetype || 'audio/m4a' })
+    const formData = new FormData()
+    formData.append('file', blob, req.file.originalname || 'audio.m4a')
+    formData.append('model', 'whisper-1')
+
+    const response = await fetch('https://api.openai.com/v1/audio/transcriptions', {
+      method: 'POST',
+      headers: {
+        'Authorization': `Bearer ${process.env.OPENAI_API_KEY}`
+      },
+      body: formData
+    })
+
+    const data = await response.json() as { text?: string }
+
+    try { fs.unlinkSync(req.file.path) } catch (e) {}
+
+    if (data.text) {
+      res.json({ text: data.text })
+    } else {
+      res.status(500).json({ error: 'No transcription returned' })
+    }
+  } catch (err) {
+    console.log('error in transcription: ', err)
+    res.status(500).json({ error: 'Transcription failed' })
+  }
+})

From cc8a7ee6bf40228ebca8f7f489f7cea8b45cb339 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 17:20:04 +0000
Subject: [PATCH 2/9] Clean up temp file in catch block to prevent leaking
 uploads on error

Co-Authored-By: Nader Dabit <dabit3@gmail.com>
---
 server/src/chat/transcribe.ts | 1 +
 1 file changed, 1 insertion(+)

diff --git a/server/src/chat/transcribe.ts b/server/src/chat/transcribe.ts
index f34538ab..4842effa 100644
--- a/server/src/chat/transcribe.ts
+++ b/server/src/chat/transcribe.ts
@@ -34,6 +34,7 @@ export const transcribe = asyncHandler(async (req: Request, res: Response) => {
     }
   } catch (err) {
     console.log('error in transcription: ', err)
+    if (req.file?.path) { try { fs.unlinkSync(req.file.path) } catch (e) {} }
     res.status(500).json({ error: 'Transcription failed' })
   }
 })

From 71909e5215ee456a501919941b5de3515b8fcd9e Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 17:26:30 +0000
Subject: [PATCH 3/9] Add onError callback to Speech.speak to prevent stuck
 isSpeaking state

Co-Authored-By: Nader Dabit <dabit3@gmail.com>
---
 app/src/screens/voice.tsx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/src/screens/voice.tsx b/app/src/screens/voice.tsx
index 90c15c09..e3e40759 100644
--- a/app/src/screens/voice.tsx
+++ b/app/src/screens/voice.tsx
@@ -221,6 +221,7 @@ export function VoiceChat() {
     Speech.speak(cleanText, {
       onDone: () => setIsSpeaking(false),
       onStopped: () => setIsSpeaking(false),
+      onError: () => setIsSpeaking(false),
       rate: 1.0,
     })
   }

From ea6a74171c79feffd260217b8c27f692b1034367 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 17:35:29 +0000
Subject: [PATCH 4/9] Use deep copy for state updates to avoid mutating React
 state

Co-Authored-By: Nader Dabit <dabit3@gmail.com>
---
 app/src/screens/voice.tsx | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/src/screens/voice.tsx b/app/src/screens/voice.tsx
index e3e40759..83c46e94 100644
--- a/app/src/screens/voice.tsx
+++ b/app/src/screens/voice.tsx
@@ -181,9 +181,9 @@ export function VoiceChat() {
               localResponse += data.content
             }
           }
-          const updated = [...newMessages]
+          const updated = JSON.parse(JSON.stringify(newMessages))
           updated[updated.length - 1].assistant = localResponse
-          setMessages([...updated])
+          setMessages(updated)
         } else {
           es.close()
           setVoiceState('idle')

From 23743c44ec43c5f1a8f266f54f049900dc4c077a Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 17:42:00 +0000
Subject: [PATCH 5/9] Handle empty model response to prevent permanent spinner

Co-Authored-By: Nader Dabit <dabit3@gmail.com>
---
 app/src/screens/voice.tsx | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/app/src/screens/voice.tsx b/app/src/screens/voice.tsx
index 83c46e94..abcf47af 100644
--- a/app/src/screens/voice.tsx
+++ b/app/src/screens/voice.tsx
@@ -186,6 +186,11 @@ export function VoiceChat() {
           setMessages(updated)
         } else {
           es.close()
+          if (!localResponse) {
+            const updated = JSON.parse(JSON.stringify(newMessages))
+            updated[updated.length - 1].assistant = '(No response received)'
+            setMessages(updated)
+          }
           setVoiceState('idle')
           if (chatTypeStr === 'claude') {
             setApiMessages(prev =>

From f5ca3cc2dd6e4a13bd955a1922b22847872db900 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 17:46:40 +0000
Subject: [PATCH 6/9] Set fallback assistant text in SSE error handler to
 prevent stuck spinner

Co-Authored-By: Nader Dabit <dabit3@gmail.com>
---
 app/src/screens/voice.tsx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/app/src/screens/voice.tsx b/app/src/screens/voice.tsx
index abcf47af..7885ad5a 100644
--- a/app/src/screens/voice.tsx
+++ b/app/src/screens/voice.tsx
@@ -202,6 +202,9 @@ export function VoiceChat() {
           }
         }
       } else if (event.type === 'error' || event.type === 'exception') {
+        const updated = JSON.parse(JSON.stringify(newMessages))
+        updated[updated.length - 1].assistant = localResponse || '(Error occurred)'
+        setMessages(updated)
         setVoiceState('idle')
         es.close()
       }

From c6d6d709d7826eb876c1a4109bf6a24b7b50e4d6 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 17:52:58 +0000
Subject: [PATCH 7/9] Clean up recording ref and audio mode in stopRecording
 error path

Co-Authored-By: Nader Dabit <dabit3@gmail.com>
---
 app/src/screens/voice.tsx | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/app/src/screens/voice.tsx b/app/src/screens/voice.tsx
index 7885ad5a..750410ed 100644
--- a/app/src/screens/voice.tsx
+++ b/app/src/screens/voice.tsx
@@ -124,6 +124,8 @@ export function VoiceChat() {
       }
     } catch (err) {
       console.log('Failed to transcribe:', err)
+      recordingRef.current = null
+      try { await Audio.setAudioModeAsync({ allowsRecordingIOS: false }) } catch (e) {}
       setVoiceState('idle')
     }
   }

From b11857ef09bd41d3eddbbc6081be04da0b31b01a Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 18:04:07 +0000
Subject: [PATCH 8/9] Add guard ref to prevent double-tap race condition;
 remove dead transcription preview code

Co-Authored-By: Nader Dabit <dabit3@gmail.com>
---
 app/src/screens/voice.tsx | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/app/src/screens/voice.tsx b/app/src/screens/voice.tsx
index 750410ed..3e343a09 100644
--- a/app/src/screens/voice.tsx
+++ b/app/src/screens/voice.tsx
@@ -27,10 +27,10 @@ type VoiceState = 'idle' | 'recording' | 'transcribing' | 'responding'
 export function VoiceChat() {
   const [messages, setMessages] = useState<VoiceMessage[]>([])
   const [voiceState, setVoiceState] = useState<VoiceState>('idle')
-  const [currentTranscription, setCurrentTranscription] = useState('')
   const [isSpeaking, setIsSpeaking] = useState(false)
   const [apiMessages, setApiMessages] = useState('')
   const recordingRef = useRef<Audio.Recording | null>(null)
+  const isStartingRef = useRef(false)
   const scrollViewRef = useRef<ScrollView | null>(null)
   const pulseAnim = useRef(new Animated.Value(1)).current
 
@@ -62,9 +62,14 @@ export function VoiceChat() {
   }, [voiceState])
 
   async function startRecording() {
+    if (isStartingRef.current) return
+    isStartingRef.current = true
     try {
       const { granted } = await Audio.requestPermissionsAsync()
-      if (!granted) return
+      if (!granted) {
+        isStartingRef.current = false
+        return
+      }
 
       await Audio.setAudioModeAsync({
         allowsRecordingIOS: true,
@@ -78,6 +83,8 @@ export function VoiceChat() {
       setVoiceState('recording')
     } catch (err) {
       console.log('Failed to start recording:', err)
+    } finally {
+      isStartingRef.current = false
     }
   }
 
@@ -117,7 +124,6 @@ export function VoiceChat() {
       const data = await response.json()
 
       if (data.text && data.text.trim()) {
-        setCurrentTranscription(data.text)
         sendToModel(data.text)
       } else {
         setVoiceState('idle')
@@ -136,7 +142,6 @@ export function VoiceChat() {
 
     const newMessages: VoiceMessage[] = [...messages, { user: text }]
     setMessages([...newMessages])
-    setCurrentTranscription('')
 
     setTimeout(() => {
       scrollViewRef.current?.scrollToEnd({ animated: true })
@@ -252,7 +257,6 @@ export function VoiceChat() {
 
   function clearConversation() {
     setMessages([])
-    setCurrentTranscription('')
     setApiMessages('')
     if (isSpeaking) stopSpeaking()
   }
@@ -335,17 +339,7 @@ export function VoiceChat() {
             )}
           </View>
         ))}
-        {currentTranscription ? (
-          <View style={styles.userBubble}>
-            <Ionicons
-              name="mic"
-              size={14}
-              color={theme.tintTextColor}
-              style={styles.micIcon}
-            />
-            <Text style={styles.userText}>{currentTranscription}</Text>
-          </View>
-        ) : null}
+
       </ScrollView>
 
       <View style={styles.controlsContainer}>

From 56a390267bab961686547cd401c0b2d4a8557134 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Wed, 29 Apr 2026 18:12:23 +0000
Subject: [PATCH 9/9] Reset iOS audio mode in startRecording catch block to
 prevent leaked recording state

Co-Authored-By: Nader Dabit <dabit3@gmail.com>
---
 app/src/screens/voice.tsx | 1 +
 1 file changed, 1 insertion(+)

diff --git a/app/src/screens/voice.tsx b/app/src/screens/voice.tsx
index 3e343a09..0acae3ef 100644
--- a/app/src/screens/voice.tsx
+++ b/app/src/screens/voice.tsx
@@ -83,6 +83,7 @@ export function VoiceChat() {
       setVoiceState('recording')
     } catch (err) {
       console.log('Failed to start recording:', err)
+      try { await Audio.setAudioModeAsync({ allowsRecordingIOS: false }) } catch (e) {}
     } finally {
       isStartingRef.current = false
     }