diff --git a/examples/320-vapi-deepgram-voice-agent-ts/.env.example b/examples/320-vapi-deepgram-voice-agent-ts/.env.example new file mode 100644 index 0000000..48841b3 --- /dev/null +++ b/examples/320-vapi-deepgram-voice-agent-ts/.env.example @@ -0,0 +1,5 @@ +# Deepgram — https://console.deepgram.com/ +DEEPGRAM_API_KEY= + +# Vapi.ai — https://dashboard.vapi.ai/ +VAPI_API_KEY= diff --git a/examples/320-vapi-deepgram-voice-agent-ts/.gitignore b/examples/320-vapi-deepgram-voice-agent-ts/.gitignore new file mode 100644 index 0000000..b0cfafd --- /dev/null +++ b/examples/320-vapi-deepgram-voice-agent-ts/.gitignore @@ -0,0 +1,4 @@ +node_modules/ +dist/ +.env +package-lock.json diff --git a/examples/320-vapi-deepgram-voice-agent-ts/README.md b/examples/320-vapi-deepgram-voice-agent-ts/README.md new file mode 100644 index 0000000..60e9e8f --- /dev/null +++ b/examples/320-vapi-deepgram-voice-agent-ts/README.md @@ -0,0 +1,75 @@ +# Vapi.ai Voice Agent with Deepgram STT & TTS + +Build a production-ready voice agent on Vapi.ai using Deepgram as the speech-to-text and text-to-speech provider. This example shows how to configure Deepgram-specific settings for optimal quality and latency, handle server-side function calls via webhooks, and manage the full conversation lifecycle. + +## What you'll build + +A TypeScript server that creates a Vapi voice assistant powered by Deepgram nova-3 (STT) and aura-2 (TTS), with an Express webhook endpoint that handles real-time function calls — in this case, a pizza order status lookup. + +## Prerequisites + +- Node.js 18+ +- Deepgram account — [get a free API key](https://console.deepgram.com/) +- Vapi.ai account — [sign up](https://dashboard.vapi.ai/) +- A tunnel tool for local development (e.g. [ngrok](https://ngrok.com/) or [cloudflared](https://developers.cloudflare.com/cloudflare-one/connections/connect-networks/)) + +## Environment variables + +| Variable | Where to find it | +|----------|-----------------| +| `DEEPGRAM_API_KEY` | [Deepgram console](https://console.deepgram.com/) | +| `VAPI_API_KEY` | [Vapi dashboard → Organization Settings → API Keys](https://dashboard.vapi.ai/) | + +## Install and run + +```bash +cp .env.example .env +# Fill in DEEPGRAM_API_KEY and VAPI_API_KEY + +npm install +npm run build + +# 1. Create the assistant (one-time setup) +npm run create-assistant +# Copy the assistant ID and add VAPI_ASSISTANT_ID to .env + +# 2. Start the webhook server +npm start + +# 3. Expose your server (in another terminal) +ngrok http 3000 + +# 4. Update VAPI_SERVER_URL in .env with your ngrok URL + /webhook +# Then re-create the assistant or update it in the Vapi dashboard + +# 5. Make a test call from the Vapi dashboard or: +curl -X POST http://localhost:3000/call \ + -H "Content-Type: application/json" \ + -d '{"assistantId": "your-assistant-id"}' +``` + +## Key parameters + +| Parameter | Value | Description | +|-----------|-------|-------------| +| `transcriber.provider` | `deepgram` | Uses Deepgram for speech-to-text | +| `transcriber.model` | `nova-3` | Latest and most accurate Deepgram STT model | +| `transcriber.endpointing` | `255` | Milliseconds of silence before end-of-turn detection | +| `voice.provider` | `deepgram` | Uses Deepgram for text-to-speech | +| `voice.voiceId` | `aura-2-thalia-en` | Natural-sounding female English voice | +| `model.provider` | `openai` | LLM provider for generating responses | +| `model.model` | `gpt-4o-mini` | Fast, cost-effective model for conversational AI | + +## How it works + +1. **`create-assistant.ts`** provisions a Vapi assistant via the server SDK, configuring Deepgram as both the STT transcriber (nova-3) and TTS voice (aura-2-thalia-en), plus an OpenAI LLM with a system prompt and function definitions. + +2. **`index.ts`** runs an Express server with a `/webhook` endpoint. When a call connects, Vapi streams audio to Deepgram for transcription, feeds the text to the LLM, and synthesises responses with Deepgram TTS — all managed by Vapi's infrastructure. + +3. When the LLM decides to call a function (e.g. `check_order_status`), Vapi sends a `function-call` event to your webhook. Your server executes the logic and returns the result, which the LLM uses to continue the conversation. + +4. At the end of the call, Vapi sends an `end-of-call-report` with duration, cost, and a full transcript. + +## Starter templates + +[deepgram-starters](https://github.com/orgs/deepgram-starters/repositories) diff --git a/examples/320-vapi-deepgram-voice-agent-ts/package.json b/examples/320-vapi-deepgram-voice-agent-ts/package.json new file mode 100644 index 0000000..c770609 --- /dev/null +++ b/examples/320-vapi-deepgram-voice-agent-ts/package.json @@ -0,0 +1,27 @@ +{ + "name": "deepgram-vapi-voice-agent-ts", + "version": "1.0.0", + "description": "Vapi.ai voice agent using Deepgram for STT and TTS with server-side webhook handling", + "main": "dist/index.js", + "scripts": { + "build": "tsc", + "start": "node dist/index.js", + "create-assistant": "node dist/create-assistant.js", + "dev": "tsc && node dist/index.js", + "test": "node tests/test.js" + }, + "dependencies": { + "@deepgram/sdk": "5.0.0", + "@vapi-ai/server-sdk": "^0.11.0", + "dotenv": "^16.4.0", + "express": "^4.21.0" + }, + "devDependencies": { + "@types/express": "^5.0.0", + "@types/node": "^22.0.0", + "typescript": "^5.7.0" + }, + "engines": { + "node": ">=18" + } +} diff --git a/examples/320-vapi-deepgram-voice-agent-ts/src/create-assistant.ts b/examples/320-vapi-deepgram-voice-agent-ts/src/create-assistant.ts new file mode 100644 index 0000000..c1643a2 --- /dev/null +++ b/examples/320-vapi-deepgram-voice-agent-ts/src/create-assistant.ts @@ -0,0 +1,111 @@ +import 'dotenv/config'; +import { VapiClient } from '@vapi-ai/server-sdk'; + +// Creates a Vapi assistant configured with Deepgram for both STT and TTS. +// Run once to provision the assistant, then use the returned ID for calls. + +if (!process.env.VAPI_API_KEY) { + console.error('Error: VAPI_API_KEY environment variable is not set.'); + process.exit(1); +} + +const vapi = new VapiClient({ token: process.env.VAPI_API_KEY }); + +// Webhook URL where Vapi sends function-call and status events. +// In development, use a tunnel (ngrok, cloudflared) pointing to your local server. +const SERVER_URL = process.env.VAPI_SERVER_URL || 'https://your-server.example.com/webhook'; + +async function main() { + const assistant = await vapi.assistants.create({ + name: 'Deepgram Pizza Assistant', + + // Greeting spoken when a call connects + firstMessage: 'Thanks for calling Deepgram Pizza! How can I help you today?', + + // ── Deepgram STT (transcriber) ──────────────────────────────────────── + // nova-3 is the latest and most accurate general-purpose model. + // language defaults to multi (automatic language detection) if omitted. + transcriber: { + provider: 'deepgram', + model: 'nova-3', + language: 'en', + smartFormat: true, + // ← endpointing controls how quickly the agent detects the user stopped talking + // Lower values = faster response but may clip mid-sentence pauses + endpointing: 255, + }, + + // ── Deepgram TTS (voice) ────────────────────────────────────────────── + // aura-2 is the latest Deepgram TTS model family with natural-sounding voices. + // See https://developers.deepgram.com/docs/tts-models for available voice IDs. + voice: { + provider: 'deepgram', + voiceId: 'thalia', + }, + + // ── LLM (the "brain" that generates responses) ──────────────────────── + model: { + provider: 'openai', + model: 'gpt-4o-mini', + messages: [ + { + role: 'system', + content: + 'You are a friendly phone assistant for a pizza shop called "Deepgram Pizza". ' + + 'You help customers check their order status. Keep responses concise — the caller is on the phone. ' + + 'When a customer asks about their order, use the check_order_status function to look it up.', + }, + ], + // ← THIS enables function calling: the LLM can invoke server-side tools + tools: [ + { + type: 'function', + function: { + name: 'check_order_status', + description: 'Look up the current status of a pizza order by order number', + parameters: { + type: 'object', + properties: { + order_number: { + type: 'string', + description: 'The order number to look up, e.g. "1001"', + }, + }, + required: ['order_number'], + }, + }, + }, + ], + }, + + // ── Server (webhook) configuration ──────────────────────────────────── + server: { + url: SERVER_URL, + }, + serverMessages: [ + 'function-call', + 'status-update', + 'end-of-call-report', + 'conversation-update', + ], + + maxDurationSeconds: 600, + }); + + console.log('Assistant created successfully!'); + console.log(` ID: ${assistant.id}`); + console.log(` Name: ${assistant.name}`); + console.log(` STT: Deepgram nova-3`); + console.log(` TTS: Deepgram thalia (aura-2)`); + console.log(''); + console.log('Next steps:'); + console.log(` 1. Set VAPI_ASSISTANT_ID=${assistant.id} in your .env`); + console.log(' 2. Start the webhook server: npm run dev'); + console.log(' 3. Expose your server (ngrok/cloudflared) and update VAPI_SERVER_URL'); + console.log(' 4. Make a test call from the Vapi dashboard or POST /call'); +} + +main().catch((err) => { + console.error('Error creating assistant:', err.message || err); + process.exit(1); +}); diff --git a/examples/320-vapi-deepgram-voice-agent-ts/src/index.ts b/examples/320-vapi-deepgram-voice-agent-ts/src/index.ts new file mode 100644 index 0000000..7967bc5 --- /dev/null +++ b/examples/320-vapi-deepgram-voice-agent-ts/src/index.ts @@ -0,0 +1,115 @@ +import 'dotenv/config'; +import express, { Request, Response } from 'express'; +import { VapiClient } from '@vapi-ai/server-sdk'; + +const PORT = parseInt(process.env.PORT || '3000', 10); + +if (!process.env.VAPI_API_KEY) { + console.error('Error: VAPI_API_KEY environment variable is not set.'); + console.error('Copy .env.example to .env and add your keys.'); + process.exit(1); +} + +const vapi = new VapiClient({ token: process.env.VAPI_API_KEY }); + +// Simulated order database — replace with your real data source +const ORDER_DB: Record = { + '1001': { status: 'out_for_delivery', eta: '15 minutes', items: ['Large pepperoni pizza', 'Garlic bread'] }, + '1002': { status: 'preparing', eta: '30 minutes', items: ['Margherita pizza', 'Caesar salad'] }, + '1003': { status: 'delivered', eta: 'Already delivered', items: ['Hawaiian pizza'] }, +}; + +function handleToolCall(name: string, args: Record): string { + if (name === 'check_order_status') { + const orderNumber = String(args.order_number || ''); + const order = ORDER_DB[orderNumber]; + if (!order) { + return JSON.stringify({ error: `Order ${orderNumber} not found` }); + } + return JSON.stringify({ order_number: orderNumber, ...order }); + } + return JSON.stringify({ error: `Unknown function: ${name}` }); +} + +export function createApp() { + const app = express(); + app.use(express.json()); + + // POST /webhook — Vapi sends server events here (function calls, status updates, end-of-call) + app.post('/webhook', (req: Request, res: Response) => { + const event = req.body; + const type = event?.message?.type; + + console.log(`[webhook] ${type || 'unknown'}`); + + switch (type) { + case 'function-call': { + // ← THIS enables tool use: Vapi asks us to execute a function the LLM invoked + const fnCall = event.message.functionCall; + console.log(`[function] ${fnCall.name}(${JSON.stringify(fnCall.parameters)})`); + const result = handleToolCall(fnCall.name, fnCall.parameters || {}); + console.log(`[function] result: ${result}`); + res.json({ result }); + return; + } + + case 'status-update': + console.log(`[status] ${event.message.status} — ${event.message.endedReason || ''}`); + break; + + case 'end-of-call-report': + console.log(`[report] Duration: ${event.message.durationSeconds}s, Cost: $${event.message.cost}`); + if (event.message.transcript) { + console.log(`[report] Transcript: ${event.message.transcript.substring(0, 200)}...`); + } + break; + + case 'conversation-update': + break; + + case 'speech-update': + break; + + default: + break; + } + + res.status(200).send(); + }); + + // GET /health — quick check that the server is running + app.get('/health', (_req: Request, res: Response) => { + res.json({ status: 'ok', service: 'vapi-deepgram-voice-agent' }); + }); + + // POST /call — programmatically start a Vapi web call using the assistant + app.post('/call', async (req: Request, res: Response) => { + const assistantId = req.body?.assistantId || process.env.VAPI_ASSISTANT_ID; + if (!assistantId) { + res.status(400).json({ error: 'Missing assistantId in body or VAPI_ASSISTANT_ID env' }); + return; + } + + try { + const call = await vapi.calls.create({ assistantId }) as unknown as { id: string; status: string }; + console.log(`[call] Created: ${call.id}`); + res.json({ callId: call.id, status: call.status }); + } catch (err: unknown) { + const msg = err instanceof Error ? err.message : String(err); + console.error(`[call] Error: ${msg}`); + res.status(500).json({ error: msg }); + } + }); + + return app; +} + +if (require.main === module) { + const app = createApp(); + app.listen(PORT, () => { + console.log(`Server listening on port ${PORT}`); + console.log(` POST /webhook — Vapi server events (set this as your Server URL in the assistant)`); + console.log(` POST /call — Start a web call`); + console.log(` GET /health — Health check`); + }); +} diff --git a/examples/320-vapi-deepgram-voice-agent-ts/tests/test.js b/examples/320-vapi-deepgram-voice-agent-ts/tests/test.js new file mode 100644 index 0000000..435a996 --- /dev/null +++ b/examples/320-vapi-deepgram-voice-agent-ts/tests/test.js @@ -0,0 +1,93 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +// ── Credential check — MUST be first ────────────────────────────────────── +const envExample = path.join(__dirname, '..', '.env.example'); +const required = fs.readFileSync(envExample, 'utf8') + .split('\n') + .filter(l => /^[A-Z][A-Z0-9_]+=/.test(l.trim())) + .map(l => l.split('=')[0].trim()); + +const missing = required.filter(k => !process.env[k]); +if (missing.length > 0) { + console.error(`MISSING_CREDENTIALS: ${missing.join(',')}`); + process.exit(2); +} +// ────────────────────────────────────────────────────────────────────────── + +const { VapiClient } = require('@vapi-ai/server-sdk'); +const { DeepgramClient } = require('@deepgram/sdk'); + +async function run() { + // ── Test 1: Verify Deepgram API key works ───────────────────────────── + console.log('Test 1: Verifying Deepgram API key...'); + const deepgram = new DeepgramClient({ apiKey: process.env.DEEPGRAM_API_KEY }); + const data = await deepgram.listen.v1.media.transcribeUrl({ + url: 'https://dpgr.am/spacewalk.wav', + model: 'nova-3', + tag: 'deepgram-examples', + }); + + const transcript = data?.results?.channels?.[0]?.alternatives?.[0]?.transcript; + if (!transcript || transcript.length < 20) { + throw new Error(`Deepgram transcript too short or empty: "${transcript}"`); + } + console.log(` ✓ Deepgram STT working (${transcript.length} chars)`); + + // ── Test 2: Verify Vapi API key works by listing assistants ─────────── + console.log('Test 2: Verifying Vapi API key...'); + const vapi = new VapiClient({ token: process.env.VAPI_API_KEY }); + const assistants = await vapi.assistants.list(); + + if (!Array.isArray(assistants)) { + throw new Error('Expected assistants.list() to return an array'); + } + console.log(` ✓ Vapi API accessible (${assistants.length} assistant(s) found)`); + + // ── Test 3: Create a transient assistant with Deepgram STT/TTS ──────── + console.log('Test 3: Creating test assistant with Deepgram provider...'); + const assistant = await vapi.assistants.create({ + name: 'deepgram-examples-test-' + Date.now(), + firstMessage: 'Hello, this is a test.', + transcriber: { + provider: 'deepgram', + model: 'nova-3', + language: 'en', + }, + voice: { + provider: 'deepgram', + voiceId: 'aura-2-thalia-en', + }, + model: { + provider: 'openai', + model: 'gpt-4o-mini', + messages: [{ role: 'system', content: 'You are a test assistant.' }], + }, + maxDurationSeconds: 60, + }); + + if (!assistant.id) { + throw new Error('Assistant creation did not return an ID'); + } + console.log(` ✓ Assistant created: ${assistant.id}`); + + // ── Cleanup: delete the test assistant ──────────────────────────────── + try { + await vapi.assistants.delete(assistant.id); + console.log(` ✓ Test assistant deleted`); + } catch (cleanupErr) { + console.warn(` ⚠ Could not delete test assistant: ${cleanupErr.message}`); + } +} + +run() + .then(() => { + console.log('\n✓ All tests passed'); + process.exit(0); + }) + .catch(err => { + console.error(`\n✗ Test failed: ${err.message}`); + process.exit(1); + }); diff --git a/examples/320-vapi-deepgram-voice-agent-ts/tsconfig.json b/examples/320-vapi-deepgram-voice-agent-ts/tsconfig.json new file mode 100644 index 0000000..d6e8f63 --- /dev/null +++ b/examples/320-vapi-deepgram-voice-agent-ts/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "commonjs", + "outDir": "dist", + "rootDir": "src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "resolveJsonModule": true + }, + "include": ["src/**/*"] +}