diff --git a/app/package.json b/app/package.json index 9fd65c86..87189261 100644 --- a/app/package.json +++ b/app/package.json @@ -19,11 +19,13 @@ "@react-navigation/native-stack": "^7.3.10", "@ronradtke/react-native-markdown-display": "^8.0.0", "expo": "^54.0.0", + "expo-av": "^16.0.8", "expo-clipboard": "~8.0.8", "expo-document-picker": "~14.0.8", "expo-file-system": "~19.0.21", "expo-font": "~14.0.11", "expo-image-picker": "~17.0.10", + "expo-speech": "^55.0.13", "expo-splash-screen": "~31.0.13", "expo-status-bar": "~3.0.9", "punycode": "^2.3.1", diff --git a/app/pnpm-lock.yaml b/app/pnpm-lock.yaml index 44516e7e..cc4fea4b 100644 --- a/app/pnpm-lock.yaml +++ b/app/pnpm-lock.yaml @@ -38,6 +38,9 @@ importers: expo: specifier: ^54.0.0 version: 54.0.32(@babel/core@7.28.6)(@expo/metro-runtime@6.1.2)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0) + expo-av: + specifier: ^16.0.8 + version: 16.0.8(expo@54.0.32)(react-native-web@0.20.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0) expo-clipboard: specifier: ~8.0.8 version: 8.0.8(expo@54.0.32)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0) @@ -53,6 +56,9 @@ importers: expo-image-picker: specifier: ~17.0.10 version: 17.0.10(expo@54.0.32) + expo-speech: + specifier: ^55.0.13 + version: 55.0.13(expo@54.0.32) expo-splash-screen: specifier: ~31.0.13 version: 31.0.13(expo@54.0.32) @@ -1557,6 +1563,17 @@ packages: react: '*' react-native: '*' + expo-av@16.0.8: + resolution: {integrity: sha512-cmVPftGR/ca7XBgs7R6ky36lF3OC0/MM/lpgX/yXqfv0jASTsh7AYX9JxHCwFmF+Z6JEB1vne9FDx4GiLcGreQ==} + peerDependencies: + expo: '*' + react: '*' + react-native: '*' + react-native-web: '*' + peerDependenciesMeta: + react-native-web: + optional: true + expo-clipboard@8.0.8: resolution: {integrity: sha512-VKoBkHIpZZDJTB0jRO4/PZskHdMNOEz3P/41tmM6fDuODMpqhvyWK053X0ebspkxiawJX9lX33JXHBCvVsTTOA==} peerDependencies: @@ -1618,6 +1635,11 @@ packages: resolution: {integrity: sha512-IGR++flYH70rhLyeXF0Phle56/k4cee87WeQ4mamS+MkVAVP+dDlOHf2nN06Z9Y2KhU0Gp1k+y61KkghF7HdhA==} engines: {node: '>=20.16.0'} + expo-speech@55.0.13: + resolution: {integrity: sha512-DZzwe9Z6NhHmNi5ICwYE9BgcNmE3U8t0pSlg1vs18f6omyD88e1ZqCJ0as1aWgZYO3WtbcwWqCTboV8QMwtOow==} + peerDependencies: + expo: '*' + expo-splash-screen@31.0.13: resolution: {integrity: sha512-1epJLC1cDlwwj089R2h8cxaU5uk4ONVAC+vzGiTZH4YARQhL4Stlz1MbR6yAS173GMosvkE6CAeihR7oIbCkDA==} peerDependencies: @@ -1974,28 +1996,24 @@ packages: engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] - libc: [glibc] lightningcss-linux-arm64-musl@1.31.1: resolution: {integrity: sha512-mVZ7Pg2zIbe3XlNbZJdjs86YViQFoJSpc41CbVmKBPiGmC4YrfeOyz65ms2qpAobVd7WQsbW4PdsSJEMymyIMg==} engines: {node: '>= 12.0.0'} cpu: [arm64] os: [linux] - libc: [musl] lightningcss-linux-x64-gnu@1.31.1: resolution: {integrity: sha512-xGlFWRMl+0KvUhgySdIaReQdB4FNudfUTARn7q0hh/V67PVGCs3ADFjw+6++kG1RNd0zdGRlEKa+T13/tQjPMA==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] - libc: [glibc] lightningcss-linux-x64-musl@1.31.1: resolution: {integrity: sha512-eowF8PrKHw9LpoZii5tdZwnBcYDxRw2rRCyvAXLi34iyeYfqCQNA9rmUM0ce62NlPhCvof1+9ivRaTY6pSKDaA==} engines: {node: '>= 12.0.0'} cpu: [x64] os: [linux] - libc: [musl] lightningcss-win32-arm64-msvc@1.31.1: resolution: {integrity: sha512-aJReEbSEQzx1uBlQizAOBSjcmr9dCdL3XuC/6HLXAxmtErsj2ICo5yYggg1qOODQMtnjNQv2UHb9NpOuFtYe4w==} @@ -4903,6 +4921,14 @@ snapshots: transitivePeerDependencies: - supports-color + expo-av@16.0.8(expo@54.0.32)(react-native-web@0.20.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0))(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0): + dependencies: + expo: 54.0.32(@babel/core@7.28.6)(@expo/metro-runtime@6.1.2)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0) + react: 19.1.0 + react-native: 0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0) + optionalDependencies: + react-native-web: 0.20.0(react-dom@19.1.0(react@19.1.0))(react@19.1.0) + expo-clipboard@8.0.8(expo@54.0.32)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0): dependencies: expo: 54.0.32(@babel/core@7.28.6)(@expo/metro-runtime@6.1.2)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0) @@ -4964,6 +4990,10 @@ snapshots: expo-server@1.0.5: {} + expo-speech@55.0.13(expo@54.0.32): + dependencies: + expo: 54.0.32(@babel/core@7.28.6)(@expo/metro-runtime@6.1.2)(react-native@0.81.5(@babel/core@7.28.6)(@types/react@19.1.17)(react@19.1.0))(react@19.1.0) + expo-splash-screen@31.0.13(expo@54.0.32): dependencies: '@expo/prebuild-config': 54.0.8(expo@54.0.32) diff --git a/app/src/main.tsx b/app/src/main.tsx index 78f382d3..685de076 100644 --- a/app/src/main.tsx +++ b/app/src/main.tsx @@ -1,7 +1,7 @@ import { useContext, useRef, useCallback } from 'react'; import { StyleSheet, Text, View, Button } from 'react-native'; import { createBottomTabNavigator } from '@react-navigation/bottom-tabs'; -import { Chat, Images, Settings } from './screens' +import { Chat, Images, VoiceChat, Settings } from './screens' import { Header } from './components' import FeatherIcon from '@expo/vector-icons/Feather' import { @@ -43,6 +43,20 @@ function MainComponent() { ), }} /> +
, + tabBarIcon: ({ color, size }) => ( + + ), + }} + /> ([]) + const [voiceState, setVoiceState] = useState('idle') + const [isSpeaking, setIsSpeaking] = useState(false) + const [apiMessages, setApiMessages] = useState('') + const recordingRef = useRef(null) + const isStartingRef = useRef(false) + const scrollViewRef = useRef(null) + const pulseAnim = useRef(new Animated.Value(1)).current + + const { theme } = useContext(ThemeContext) + const { chatType } = useContext(AppContext) + const styles = getStyles(theme) + + useEffect(() => { + if (voiceState === 'recording') { + const pulse = Animated.loop( + Animated.sequence([ + Animated.timing(pulseAnim, { + toValue: 1.2, + duration: 800, + useNativeDriver: true, + }), + Animated.timing(pulseAnim, { + toValue: 1, + duration: 800, + useNativeDriver: true, + }), + ]) + ) + pulse.start() + return () => pulse.stop() + } else { + pulseAnim.setValue(1) + } + }, [voiceState]) + + async function startRecording() { + if (isStartingRef.current) return + isStartingRef.current = true + try { + const { granted } = await Audio.requestPermissionsAsync() + if (!granted) { + isStartingRef.current = false + return + } + + await Audio.setAudioModeAsync({ + allowsRecordingIOS: true, + playsInSilentModeIOS: true, + }) + + const { recording } = await Audio.Recording.createAsync( + Audio.RecordingOptionsPresets.HIGH_QUALITY + ) + recordingRef.current = recording + setVoiceState('recording') + } catch (err) { + console.log('Failed to start recording:', err) + try { await Audio.setAudioModeAsync({ allowsRecordingIOS: false }) } catch (e) {} + } finally { + isStartingRef.current = false + } + } + + async function stopRecording() { + if (!recordingRef.current) return + + setVoiceState('transcribing') + try { + await recordingRef.current.stopAndUnloadAsync() + await Audio.setAudioModeAsync({ + allowsRecordingIOS: false, + }) + + const uri = recordingRef.current.getURI() + recordingRef.current = null + + if (!uri) { + setVoiceState('idle') + return + } + + const formData = new FormData() + formData.append('audio', { + uri, + type: 'audio/m4a', + name: 'recording.m4a', + } as any) + + const response = await fetch(`${DOMAIN}/chat/transcribe`, { + method: 'POST', + body: formData, + headers: { + 'Content-Type': 'multipart/form-data', + }, + }) + + const data = await response.json() + + if (data.text && data.text.trim()) { + sendToModel(data.text) + } else { + setVoiceState('idle') + } + } catch (err) { + console.log('Failed to transcribe:', err) + recordingRef.current = null + try { await Audio.setAudioModeAsync({ allowsRecordingIOS: false }) } catch (e) {} + setVoiceState('idle') + } + } + + async function sendToModel(text: string) { + setVoiceState('responding') + let localResponse = '' + + const newMessages: VoiceMessage[] = [...messages, { user: text }] + setMessages([...newMessages]) + + setTimeout(() => { + scrollViewRef.current?.scrollToEnd({ animated: true }) + }, 100) + + const chatTypeStr = getChatType(chatType) + + let body: any + if (chatTypeStr === 'claude') { + const claudePrompt = `${apiMessages}\n\nHuman: ${text}\n\nAssistant:` + body = { prompt: claudePrompt, model: chatType.label } + } else if (chatTypeStr === 'gemini') { + body = { prompt: text, model: chatType.label } + } else { + const gptMessages = newMessages.reduce((acc: any[], msg) => { + acc.push({ role: 'user', content: msg.user }) + if (msg.assistant) { + acc.push({ role: 'assistant', content: msg.assistant }) + } + return acc + }, []) + body = { messages: gptMessages, model: chatType.label } + } + + const es = await getEventSource({ body, type: chatTypeStr }) + + const listener = (event: any) => { + if (event.type === 'open') { + // connection opened + } else if (event.type === 'message') { + if (event.data !== '[DONE]') { + if (localResponse.length < 850) { + scrollViewRef.current?.scrollToEnd({ animated: true }) + } + const data = JSON.parse(event.data) + if (chatTypeStr === 'claude') { + if (data.text) localResponse += data.text + } else if (chatTypeStr === 'gemini') { + localResponse += typeof data === 'string' ? data : '' + } else { + if (typeof data === 'string') { + localResponse += data + } else if (data?.content) { + localResponse += data.content + } + } + const updated = JSON.parse(JSON.stringify(newMessages)) + updated[updated.length - 1].assistant = localResponse + setMessages(updated) + } else { + es.close() + if (!localResponse) { + const updated = JSON.parse(JSON.stringify(newMessages)) + updated[updated.length - 1].assistant = '(No response received)' + setMessages(updated) + } + setVoiceState('idle') + if (chatTypeStr === 'claude') { + setApiMessages(prev => + `${prev}\n\nHuman: ${text}\n\nAssistant:${getFirstNCharsOrLess(localResponse, 2000)}` + ) + } + if (localResponse) { + speakResponse(localResponse) + } + } + } else if (event.type === 'error' || event.type === 'exception') { + const updated = JSON.parse(JSON.stringify(newMessages)) + updated[updated.length - 1].assistant = localResponse || '(Error occurred)' + setMessages(updated) + setVoiceState('idle') + es.close() + } + } + + es.addEventListener('open', listener) + es.addEventListener('message', listener) + es.addEventListener('error', listener) + } + + function speakResponse(text: string) { + const cleanText = text + .replace(/```[\s\S]*?```/g, ' code block ') + .replace(/`[^`]*`/g, '') + .replace(/[#*_~\[\]]/g, '') + .replace(/\n+/g, '. ') + .trim() + + if (!cleanText) return + + setIsSpeaking(true) + Speech.speak(cleanText, { + onDone: () => setIsSpeaking(false), + onStopped: () => setIsSpeaking(false), + onError: () => setIsSpeaking(false), + rate: 1.0, + }) + } + + function stopSpeaking() { + Speech.stop() + setIsSpeaking(false) + } + + function handleMicPress() { + if (voiceState === 'idle') { + if (isSpeaking) stopSpeaking() + startRecording() + } else if (voiceState === 'recording') { + stopRecording() + } + } + + function clearConversation() { + setMessages([]) + setApiMessages('') + if (isSpeaking) stopSpeaking() + } + + const getStatusText = () => { + switch (voiceState) { + case 'recording': return 'Listening...' + case 'transcribing': return 'Transcribing...' + case 'responding': return 'Thinking...' + default: return isSpeaking ? 'Speaking...' : 'Tap to speak' + } + } + + const getMicColor = () => { + if (voiceState === 'recording') return '#ff4444' + if (voiceState === 'transcribing' || voiceState === 'responding') return theme.mutedForegroundColor + return theme.tintColor + } + + return ( + + scrollViewRef.current?.scrollToEnd({ animated: true })} + > + {messages.length === 0 && voiceState === 'idle' && ( + + + Voice Chat + + Tap the microphone to start speaking.{'\n'}Your speech will be transcribed and sent to {chatType.name}. + + + )} + {messages.map((msg, i) => ( + + + + {msg.user} + + {msg.assistant ? ( + + + {msg.assistant} + + + ) : ( + + + + )} + + ))} + + + + + {getStatusText()} + + {messages.length > 0 && voiceState === 'idle' && ( + + + + )} + + + + + + {isSpeaking && voiceState === 'idle' && ( + + + + )} + + + + ) +} + +function getStyles(theme: any) { + return StyleSheet.create({ + container: { + flex: 1, + backgroundColor: theme.backgroundColor, + }, + messagesContainer: { + flex: 1, + }, + messagesContent: { + padding: 16, + paddingBottom: 8, + }, + emptyState: { + alignItems: 'center', + justifyContent: 'center', + paddingTop: 80, + paddingHorizontal: 40, + }, + emptyStateTitle: { + fontSize: 22, + fontFamily: theme.semiBoldFont, + color: theme.textColor, + marginTop: 16, + }, + emptyStateSubtext: { + fontSize: 14, + fontFamily: theme.regularFont, + color: theme.mutedForegroundColor, + textAlign: 'center', + marginTop: 8, + lineHeight: 20, + }, + userBubble: { + backgroundColor: theme.tintColor, + borderRadius: 16, + borderBottomRightRadius: 4, + padding: 12, + paddingHorizontal: 14, + marginBottom: 8, + alignSelf: 'flex-end', + maxWidth: '80%', + flexDirection: 'row', + alignItems: 'center', + }, + micIcon: { + marginRight: 6, + }, + userText: { + color: theme.tintTextColor, + fontSize: 15, + fontFamily: theme.regularFont, + flex: 1, + }, + assistantBubble: { + backgroundColor: theme.borderColor, + borderRadius: 16, + borderBottomLeftRadius: 4, + padding: 12, + paddingHorizontal: 14, + marginBottom: 16, + alignSelf: 'flex-start', + maxWidth: '85%', + }, + controlsContainer: { + alignItems: 'center', + paddingVertical: 20, + paddingBottom: 10, + borderTopWidth: 1, + borderTopColor: theme.borderColor, + }, + statusText: { + fontSize: 14, + fontFamily: theme.mediumFont, + color: theme.mutedForegroundColor, + marginBottom: 16, + }, + buttonRow: { + flexDirection: 'row', + alignItems: 'center', + justifyContent: 'center', + gap: 20, + }, + micButton: { + width: 72, + height: 72, + borderRadius: 36, + justifyContent: 'center', + alignItems: 'center', + shadowColor: '#000', + shadowOffset: { width: 0, height: 2 }, + shadowOpacity: 0.25, + shadowRadius: 4, + elevation: 5, + }, + micButtonDisabled: { + opacity: 0.5, + }, + secondaryButton: { + width: 44, + height: 44, + borderRadius: 22, + borderWidth: 1, + borderColor: theme.borderColor, + justifyContent: 'center', + alignItems: 'center', + }, + }) +} diff --git a/server/src/chat/chatRouter.ts b/server/src/chat/chatRouter.ts index 6bb52b61..cfb291a0 100644 --- a/server/src/chat/chatRouter.ts +++ b/server/src/chat/chatRouter.ts @@ -1,12 +1,16 @@ import express from 'express' +import multer from 'multer' import { claude } from './claude' import { gpt } from './gpt' import { gemini } from './gemini' +import { transcribe } from './transcribe' const router = express.Router() +const upload = multer({ dest: 'uploads/' }) router.post('/claude', claude) router.post('/gpt', gpt) router.post('/gemini', gemini) +router.post('/transcribe', upload.single('audio'), transcribe) export default router diff --git a/server/src/chat/transcribe.ts b/server/src/chat/transcribe.ts new file mode 100644 index 00000000..4842effa --- /dev/null +++ b/server/src/chat/transcribe.ts @@ -0,0 +1,40 @@ +import { Request, Response } from "express" +import asyncHandler from 'express-async-handler' +import fs from 'fs' + +export const transcribe = asyncHandler(async (req: Request, res: Response) => { + try { + if (!req.file) { + res.status(400).json({ error: 'No audio file provided' }) + return + } + + const fileBuffer = fs.readFileSync(req.file.path) + const blob = new Blob([fileBuffer], { type: req.file.mimetype || 'audio/m4a' }) + const formData = new FormData() + formData.append('file', blob, req.file.originalname || 'audio.m4a') + formData.append('model', 'whisper-1') + + const response = await fetch('https://api.openai.com/v1/audio/transcriptions', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}` + }, + body: formData + }) + + const data = await response.json() as { text?: string } + + try { fs.unlinkSync(req.file.path) } catch (e) {} + + if (data.text) { + res.json({ text: data.text }) + } else { + res.status(500).json({ error: 'No transcription returned' }) + } + } catch (err) { + console.log('error in transcription: ', err) + if (req.file?.path) { try { fs.unlinkSync(req.file.path) } catch (e) {} } + res.status(500).json({ error: 'Transcription failed' }) + } +})