From 03ef41c5bb5eb5cf214f4404393b8c89cfc50181 Mon Sep 17 00:00:00 2001 From: examples-bot Date: Fri, 3 Apr 2026 08:55:48 +0000 Subject: [PATCH] =?UTF-8?q?feat(examples):=20add=20380=20=E2=80=94=20Bandw?= =?UTF-8?q?idth=20real-time=20call=20transcription=20(Node.js)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../.env.example | 9 + .../README.md | 78 +++++ .../package.json | 21 ++ .../src/index.js | 211 +++++++++++++ .../tests/test.js | 276 ++++++++++++++++++ 5 files changed, 595 insertions(+) create mode 100644 examples/380-bandwidth-call-transcription-node/.env.example create mode 100644 examples/380-bandwidth-call-transcription-node/README.md create mode 100644 examples/380-bandwidth-call-transcription-node/package.json create mode 100644 examples/380-bandwidth-call-transcription-node/src/index.js create mode 100644 examples/380-bandwidth-call-transcription-node/tests/test.js diff --git a/examples/380-bandwidth-call-transcription-node/.env.example b/examples/380-bandwidth-call-transcription-node/.env.example new file mode 100644 index 0000000..e374899 --- /dev/null +++ b/examples/380-bandwidth-call-transcription-node/.env.example @@ -0,0 +1,9 @@ +# Deepgram — https://console.deepgram.com/ +DEEPGRAM_API_KEY= + +# Bandwidth — https://dashboard.bandwidth.com/ +BW_ACCOUNT_ID= +BW_USERNAME= +BW_PASSWORD= +BW_VOICE_APPLICATION_ID= +BW_NUMBER= diff --git a/examples/380-bandwidth-call-transcription-node/README.md b/examples/380-bandwidth-call-transcription-node/README.md new file mode 100644 index 0000000..5e66ed7 --- /dev/null +++ b/examples/380-bandwidth-call-transcription-node/README.md @@ -0,0 +1,78 @@ +# Bandwidth Real-Time Call Transcription + +Transcribe live phone calls in real-time by connecting Bandwidth's media streaming to Deepgram's streaming speech-to-text API. Bandwidth is a US carrier-grade CPaaS used by enterprises for programmable voice, messaging, and 911. Every word spoken on a call is transcribed within milliseconds and printed to the console. + +## What you'll build + +A Node.js server that answers inbound Bandwidth voice calls using BXML, opens a WebSocket media stream via the `` verb, receives raw audio frames from Bandwidth, forwards them to Deepgram for live transcription, and logs both interim and final transcripts to the console. + +## Prerequisites + +- Node.js 18+ +- Deepgram account — [get a free API key](https://console.deepgram.com/) +- Bandwidth account — [sign up](https://www.bandwidth.com/) +- A Bandwidth Voice application with a phone number +- A public URL for your server (use [ngrok](https://ngrok.com/) for local development) + +## Environment variables + +| Variable | Where to find it | +|----------|-----------------| +| `DEEPGRAM_API_KEY` | [Deepgram console](https://console.deepgram.com/) | +| `BW_ACCOUNT_ID` | [Bandwidth dashboard](https://dashboard.bandwidth.com/) → Account | +| `BW_USERNAME` | [Bandwidth dashboard](https://dashboard.bandwidth.com/) → Account → API Credentials | +| `BW_PASSWORD` | [Bandwidth dashboard](https://dashboard.bandwidth.com/) → Account → API Credentials | +| `BW_VOICE_APPLICATION_ID` | [Bandwidth dashboard](https://dashboard.bandwidth.com/) → Applications | +| `BW_NUMBER` | [Bandwidth dashboard](https://dashboard.bandwidth.com/) → Phone Numbers | + +Copy `.env.example` to `.env` and fill in your values. + +## Install and run + +```bash +npm install +npm start +``` + +Then expose the server publicly (for local dev): + +```bash +ngrok http 3000 +``` + +Configure your Bandwidth Voice application's callback URLs: +- **Answer URL:** `https:///webhooks/answer` (POST) +- **Status URL:** `https:///webhooks/status` (POST) + +Call your Bandwidth number — you'll see live transcripts in the console. + +## Key parameters + +| Parameter | Value | Description | +|-----------|-------|-------------| +| `model` | `nova-3` | Deepgram's latest and most accurate speech model | +| `encoding` | `mulaw` | Matches Bandwidth's PCMU telephony audio format | +| `sample_rate` | `8000` | Standard telephony sample rate (8 kHz) | +| `smart_format` | `true` | Adds punctuation, capitalization, and number formatting | +| `interim_results` | `true` | Returns partial transcripts while the speaker is still talking | +| `utterance_end_ms` | `1000` | Fires an UtteranceEnd event after 1 second of silence | + +## How it works + +1. An incoming call hits the `/webhooks/answer` POST endpoint, which returns BXML with `` (greeting) and `` pointing to the `/stream` WebSocket +2. Bandwidth opens a WebSocket to `/stream` and sends a `start` event with stream metadata (call ID, audio format) +3. Bandwidth streams the call audio as JSON messages with `eventType: "media"` containing base64-encoded PCMU audio +4. The server decodes each audio chunk and forwards the raw bytes to a Deepgram live transcription WebSocket +5. Deepgram returns interim and final transcript events, which the server logs to the console +6. When the call ends, Bandwidth sends a `stop` event and both WebSockets close cleanly + +## Related + +- [Deepgram live STT docs](https://developers.deepgram.com/docs/getting-started-with-live-streaming-audio) +- [Bandwidth Voice API docs](https://dev.bandwidth.com/apis/voice/) +- [Bandwidth BXML reference](https://dev.bandwidth.com/docs/voice/bxml/) +- [Bandwidth media streaming](https://dev.bandwidth.com/docs/voice/guides/mediaStreaming/) + +## Starter templates + +If you want a ready-to-run base for your own project, check the [deepgram-starters](https://github.com/orgs/deepgram-starters/repositories) org — there are starter repos for every language and every Deepgram product. diff --git a/examples/380-bandwidth-call-transcription-node/package.json b/examples/380-bandwidth-call-transcription-node/package.json new file mode 100644 index 0000000..a080091 --- /dev/null +++ b/examples/380-bandwidth-call-transcription-node/package.json @@ -0,0 +1,21 @@ +{ + "name": "deepgram-bandwidth-call-transcription", + "version": "1.0.0", + "description": "Transcribe Bandwidth phone calls in real-time using Deepgram live STT", + "main": "src/index.js", + "scripts": { + "start": "node src/index.js", + "test": "node tests/test.js" + }, + "dependencies": { + "@deepgram/sdk": "5.0.0", + "bandwidth-sdk": "^7.4.0", + "dotenv": "^16.4.0", + "express": "^4.21.0", + "express-ws": "^5.0.2", + "ws": "^8.18.0" + }, + "engines": { + "node": ">=18" + } +} diff --git a/examples/380-bandwidth-call-transcription-node/src/index.js b/examples/380-bandwidth-call-transcription-node/src/index.js new file mode 100644 index 0000000..a3df4de --- /dev/null +++ b/examples/380-bandwidth-call-transcription-node/src/index.js @@ -0,0 +1,211 @@ +'use strict'; + +require('dotenv').config(); + +const express = require('express'); +const expressWs = require('express-ws'); +const { DeepgramClient } = require('@deepgram/sdk'); +const { Bxml: BxmlNs } = require('bandwidth-sdk'); +const { Bxml, SpeakSentence, StartStream, StopStream, Pause } = BxmlNs; + +const PORT = process.env.PORT || 3000; + +// Bandwidth streams μ-law (PCMU) audio at 8 kHz mono — the standard telephony +// encoding. This matches Twilio's format but differs from Vonage (linear16 16 kHz). +// nova-3 handles both encodings natively; mulaw is specified here to match +// what Bandwidth actually sends on the wire. +const DEEPGRAM_LIVE_OPTIONS = { + model: 'nova-3', + encoding: 'mulaw', + sample_rate: 8000, + channels: 1, + smart_format: true, + interim_results: true, + utterance_end_ms: 1000, + tag: 'deepgram-examples', +}; + +function createApp() { + const app = express(); + expressWs(app); + app.use(express.json()); + + if (!process.env.DEEPGRAM_API_KEY) { + console.error('Error: DEEPGRAM_API_KEY environment variable is not set.'); + console.error('Copy .env.example to .env and add your API key.'); + process.exit(1); + } + + const deepgram = new DeepgramClient({ apiKey: process.env.DEEPGRAM_API_KEY }); + + // Bandwidth calls this endpoint when an inbound call arrives (the "answer URL"). + // We return BXML — Bandwidth's XML call-control language — that plays a greeting + // then opens a WebSocket media stream back to our /stream endpoint. + // + // Key difference from Twilio: Bandwidth uses (not ) + // and sends JSON-wrapped base64 audio with an eventType field rather than an event field. + app.post('/webhooks/answer', (req, res) => { + const host = req.headers.host; + const protocol = req.headers['x-forwarded-proto'] === 'https' ? 'wss' : 'ws'; + const streamUrl = `${protocol}://${host}/stream`; + + const speak = new SpeakSentence('This call is being transcribed by Deepgram.'); + const startStream = new StartStream({ + destination: streamUrl, + name: 'deepgram_stream', + }); + // StopStream with wait keeps the call alive until the stream ends. + // Pause holds the BXML execution so the call doesn't hang up immediately. + const pause = new Pause({ duration: 3600 }); + const stopStream = new StopStream({ name: 'deepgram_stream' }); + const bxml = new Bxml([speak, startStream, pause, stopStream]); + + res.type('application/xml').send(bxml.toBxml()); + console.log(`[answer] New call → streaming to ${streamUrl}`); + }); + + // Bandwidth sends call lifecycle events here (initiated, ringing, answered, completed). + // A 200 response is required even if you don't use the data. + app.post('/webhooks/status', (req, res) => { + const { eventType, callId } = req.body || {}; + if (eventType) { + console.log(`[status] ${callId || 'unknown'}: ${eventType}`); + } + res.sendStatus(200); + }); + + // Bandwidth opens a WebSocket here for each active . + // Messages are JSON with an eventType field: + // "start" — stream metadata (accountId, callId, tracks with encoding/sampleRate) + // "media" — base64-encoded audio in the "payload" field + // "stop" — stream ended (caller hung up or StopStream executed) + // + // Unlike Vonage (raw binary PCM), Bandwidth wraps audio in JSON — similar to + // Twilio but with different field names (eventType vs event, payload vs media.payload). + app.ws('/stream', (bandwidthWs) => { + let dgConnection = null; + let dgReady = false; + let callId = 'unknown'; + const mediaQueue = []; + + console.log('[stream] Bandwidth WebSocket connected'); + + bandwidthWs.on('message', (raw) => { + try { + const message = JSON.parse(raw); + + switch (message.eventType) { + case 'start': + callId = message.metadata?.callId || callId; + console.log(`[bandwidth] Stream started — call: ${callId}`); + if (message.metadata?.tracks) { + const track = message.metadata.tracks[0]; + console.log(`[bandwidth] Audio format: ${track?.mediaFormat?.encoding} @ ${track?.mediaFormat?.sampleRate} Hz`); + } + break; + + case 'media': + if (dgReady && dgConnection) { + try { + dgConnection.sendMedia(Buffer.from(message.payload, 'base64')); + } catch {} + } else { + mediaQueue.push(message.payload); + } + break; + + case 'stop': + console.log(`[bandwidth] Stream stopped — call: ${callId}`); + if (dgConnection) { + try { dgConnection.sendCloseStream({ type: 'CloseStream' }); } catch {} + try { dgConnection.close(); } catch {} + dgConnection = null; + } + break; + + default: + break; + } + } catch (err) { + console.error('[stream] Error handling message:', err.message); + } + }); + + bandwidthWs.on('close', () => { + console.log(`[stream] Bandwidth WebSocket closed — call: ${callId}`); + if (dgConnection) { + try { dgConnection.sendCloseStream({ type: 'CloseStream' }); } catch {} + try { dgConnection.close(); } catch {} + dgConnection = null; + } + }); + + bandwidthWs.on('error', (err) => { + console.error('[stream] Bandwidth WebSocket error:', err.message); + if (dgConnection) { + try { dgConnection.close(); } catch {} + dgConnection = null; + } + }); + + (async () => { + dgConnection = await deepgram.listen.v1.connect(DEEPGRAM_LIVE_OPTIONS); + + dgConnection.on('open', () => { + console.log('[deepgram] Connection opened'); + dgReady = true; + for (const payload of mediaQueue) { + try { + dgConnection.sendMedia(Buffer.from(payload, 'base64')); + } catch {} + } + mediaQueue.length = 0; + }); + + dgConnection.on('error', (err) => { + console.error('[deepgram] Error:', err.message); + dgReady = false; + }); + + dgConnection.on('close', () => { + console.log('[deepgram] Connection closed'); + dgReady = false; + }); + + // Deepgram transcript events arrive here. + // data.channel.alternatives[0].transcript contains the text. + // data.is_final distinguishes stable results from interim partials. + dgConnection.on('message', (data) => { + const transcript = data?.channel?.alternatives?.[0]?.transcript; + if (transcript) { + const tag = data.is_final ? 'final' : 'interim'; + console.log(`[${tag}] ${transcript}`); + } + }); + + dgConnection.connect(); + await dgConnection.waitForOpen(); + })().catch((err) => { + console.error('[deepgram] Setup failed:', err.message); + }); + }); + + app.get('/', (_req, res) => { + res.json({ status: 'ok', service: 'deepgram-bandwidth-call-transcription' }); + }); + + return app; +} + +if (require.main === module) { + const app = createApp(); + app.listen(PORT, () => { + console.log(`Server listening on port ${PORT}`); + console.log(` POST /webhooks/answer — Bandwidth answer webhook (returns BXML)`); + console.log(` POST /webhooks/status — Bandwidth status webhook`); + console.log(` WS /stream — Bandwidth media stream WebSocket`); + console.log(` GET / — Health check`); + }); +} + +module.exports = { createApp }; diff --git a/examples/380-bandwidth-call-transcription-node/tests/test.js b/examples/380-bandwidth-call-transcription-node/tests/test.js new file mode 100644 index 0000000..a608a74 --- /dev/null +++ b/examples/380-bandwidth-call-transcription-node/tests/test.js @@ -0,0 +1,276 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const http = require('http'); +const { execSync } = require('child_process'); +const WebSocket = require('ws'); + +// ── Credential check ───────────────────────────────────────────────────────── +const envExample = path.join(__dirname, '..', '.env.example'); +const required = fs.readFileSync(envExample, 'utf8') + .split('\n') + .filter(l => /^[A-Z][A-Z0-9_]+=/.test(l.trim())) + .map(l => l.split('=')[0].trim()); + +const missing = required.filter(k => !process.env[k]); +if (missing.length > 0) { + console.error(`MISSING_CREDENTIALS: ${missing.join(',')}`); + process.exit(2); +} +// ───────────────────────────────────────────────────────────────────────────── + +const { createApp } = require('../src/index.js'); + +const PORT = 3099; +const AUDIO_URL = 'https://dpgr.am/spacewalk.wav'; +const TMP_WAV = '/tmp/bandwidth_test.wav'; +const CHUNK_SIZE = 320; + +const LINEAR_TO_ULAW = (() => { + const BIAS = 0x84; + const CLIP = 32635; + const table = new Int8Array(65536); + for (let i = -32768; i < 32768; i++) { + let sample = i < 0 ? ~i : i; + if (sample > CLIP) sample = CLIP; + sample += BIAS; + let exponent = 7; + for (let expMask = 0x4000; (sample & expMask) === 0 && exponent > 0; exponent--, expMask >>= 1); + const mantissa = (sample >> (exponent + 3)) & 0x0F; + let ulawByte = ~(((i < 0 ? 0x80 : 0) | (exponent << 4) | mantissa)) & 0xFF; + table[i & 0xFFFF] = ulawByte; + } + return table; +})(); + +function wavToMulaw8k(wavBuffer) { + let offset = 12; + let sampleRate = 0, bitsPerSample = 0, numChannels = 0, dataStart = 0, dataSize = 0; + while (offset < wavBuffer.length - 8) { + const chunkId = wavBuffer.toString('ascii', offset, offset + 4); + const chunkSize = wavBuffer.readUInt32LE(offset + 4); + if (chunkId === 'fmt ') { + numChannels = wavBuffer.readUInt16LE(offset + 10); + sampleRate = wavBuffer.readUInt32LE(offset + 12); + bitsPerSample = wavBuffer.readUInt16LE(offset + 22); + } else if (chunkId === 'data') { + dataStart = offset + 8; + dataSize = chunkSize; + break; + } + offset += 8 + chunkSize; + } + if (!dataStart) throw new Error('Invalid WAV: no data chunk'); + + const bytesPerSample = bitsPerSample / 8; + const totalSamples = Math.floor(dataSize / (bytesPerSample * numChannels)); + const ratio = sampleRate / 8000; + const outLen = Math.floor(totalSamples / ratio); + const out = Buffer.alloc(outLen); + + for (let i = 0; i < outLen; i++) { + const srcIdx = Math.floor(i * ratio); + const byteOff = dataStart + srcIdx * bytesPerSample * numChannels; + let sample; + if (bitsPerSample === 16) { + sample = wavBuffer.readInt16LE(byteOff); + } else if (bitsPerSample === 24) { + sample = (wavBuffer[byteOff] | (wavBuffer[byteOff + 1] << 8) | (wavBuffer[byteOff + 2] << 16)); + if (sample & 0x800000) sample |= ~0xFFFFFF; + sample = sample >> 8; + } else if (bitsPerSample === 32) { + sample = wavBuffer.readInt32LE(byteOff) >> 16; + } else { + sample = (wavBuffer[byteOff] - 128) << 8; + } + out[i] = LINEAR_TO_ULAW[sample & 0xFFFF]; + } + return out; +} + +function prepareMulawAudio() { + console.log('Downloading test audio...'); + execSync(`curl -s -L -o "${TMP_WAV}" "${AUDIO_URL}"`, { stdio: 'pipe' }); + + console.log('Converting to mulaw 8 kHz mono...'); + const wavData = fs.readFileSync(TMP_WAV); + const audio = wavToMulaw8k(wavData); + console.log(`Audio ready: ${audio.length} bytes of mulaw 8 kHz`); + return audio; +} + +// ── Test 1: BXML endpoint ─────────────────────────────────────────────────── +// POST /webhooks/answer should return BXML with pointing at /stream. +function testBxmlEndpoint(port) { + return new Promise((resolve, reject) => { + const body = JSON.stringify({ eventType: 'answer', callId: 'test-call-id' }); + const req = http.request( + { + hostname: 'localhost', port, path: '/webhooks/answer', method: 'POST', + headers: { + host: `localhost:${port}`, + 'content-type': 'application/json', + 'content-length': Buffer.byteLength(body), + }, + }, + (res) => { + let data = ''; + res.on('data', c => (data += c)); + res.on('end', () => { + if (res.statusCode !== 200) return reject(new Error(`/webhooks/answer returned ${res.statusCode}`)); + if (!data.includes('StartStream')) + return reject(new Error(`BXML missing element:\n${data}`)); + if (!data.includes(`localhost:${port}/stream`)) + return reject(new Error(`BXML should point to /stream:\n${data}`)); + if (!data.includes('SpeakSentence')) + return reject(new Error(`BXML missing element:\n${data}`)); + console.log('POST /webhooks/answer -> BXML with correct '); + resolve(); + }); + }, + ); + req.on('error', reject); + req.write(body); + req.end(); + }); +} + +// ── Test 2: Full WebSocket + Deepgram pipeline ───────────────────────────── +// Connects to /stream acting as Bandwidth, sends real mulaw audio in Bandwidth's +// exact message format, and verifies Deepgram returns transcript text. +function testMediaStreamFlow(port, audioData) { + return new Promise((resolve, reject) => { + const transcripts = []; + + const origLog = console.log; + console.log = (...args) => { + origLog(...args); + const line = args.join(' '); + if (line.startsWith('[final]') || line.startsWith('[interim]')) { + transcripts.push(line); + } + }; + + const cleanup = (fn) => { console.log = origLog; fn(); }; + + let settled = false; + + const timeout = setTimeout(() => { + cleanup(() => reject(new Error( + 'Timed out (30s) waiting for Deepgram transcript.\n' + + 'Check DEEPGRAM_API_KEY and connectivity to api.deepgram.com.', + ))); + }, 30_000); + + const settle = () => { + if (settled) return; + settled = true; + clearTimeout(timeout); + cleanup(() => { + if (transcripts.length === 0) { + reject(new Error( + 'No transcripts received from Deepgram after streaming audio.\n' + + 'This may indicate a Deepgram connection issue or audio encoding problem.', + )); + } else { + resolve(transcripts); + } + }); + }; + + const ws = new WebSocket(`ws://localhost:${port}/stream`); + + ws.on('error', (err) => { + clearTimeout(timeout); + cleanup(() => reject(err)); + }); + + ws.on('open', () => { + // Bandwidth sends a "start" event first with stream metadata + ws.send(JSON.stringify({ + eventType: 'start', + metadata: { + accountId: process.env.BW_ACCOUNT_ID, + callId: 'test-call-id', + streamId: 'stream-ci-test', + streamName: 'deepgram_stream', + tracks: [{ + name: 'inbound', + mediaFormat: { encoding: 'audio/PCMU', sampleRate: 8000 }, + }], + }, + })); + + let offset = 0; + const MAX_BYTES = 8000 * 10; + + const sendChunk = () => { + if (ws.readyState !== WebSocket.OPEN) return; + + if (offset >= audioData.length || offset >= MAX_BYTES) { + ws.send(JSON.stringify({ eventType: 'stop' })); + setTimeout(() => { + try { ws.close(); } catch {} + setTimeout(settle, 2000); + }, 500); + return; + } + + // Bandwidth sends audio as JSON with eventType "media" and base64 payload + ws.send(JSON.stringify({ + eventType: 'media', + payload: audioData.subarray(offset, offset + CHUNK_SIZE).toString('base64'), + })); + + offset += CHUNK_SIZE; + setTimeout(sendChunk, 20); + }; + + setTimeout(sendChunk, 500); + }); + + ws.on('close', () => { + setTimeout(settle, 2000); + }); + }); +} + +// ── Main ────────────────────────────────────────────────────────────────────── +async function run() { + const audioData = prepareMulawAudio(); + + const app = createApp(); + const server = app.listen(PORT); + await new Promise(r => server.on('listening', r)); + console.log(`\nServer started on :${PORT}`); + + try { + await testBxmlEndpoint(PORT); + + console.log('\nStreaming audio through server -> Deepgram (up to 30 s)...'); + const transcripts = await testMediaStreamFlow(PORT, audioData); + + console.log(`\nReceived ${transcripts.length} transcript event(s)`); + console.log(` First: ${transcripts[0]}`); + + const combined = transcripts.join(' ').toLowerCase(); + const audioSentSecs = Math.min(audioData.length, 8000 * 10) / 8000; + const minChars = Math.max(5, audioSentSecs * 2); + const totalChars = combined.replace(/\[(final|interim)\]/g, '').trim().length; + + if (totalChars < minChars) { + throw new Error( + `Transcript too short: ${totalChars} chars for ${audioSentSecs}s of audio (expected >= ${minChars})`, + ); + } + console.log(`Transcript length verified: ${totalChars} chars for ${audioSentSecs}s of audio`); + + } finally { + server.close(); + } +} + +run() + .then(() => { console.log('\nAll tests passed'); process.exit(0); }) + .catch(err => { console.error(`\nTest failed: ${err.message}`); process.exit(1); });