From 006e72d8ea4ecf385da102e075c472959e2f2ab0 Mon Sep 17 00:00:00 2001 From: examples-bot Date: Fri, 3 Apr 2026 16:58:49 +0000 Subject: [PATCH] =?UTF-8?q?feat(examples):=20add=20410=20=E2=80=94=20Sinch?= =?UTF-8?q?=20Voice=20API=20call=20transcription=20with=20Deepgram=20STT?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude --- .../.env.example | 6 + .../README.md | 85 +++++ .../package.json | 21 ++ .../src/index.js | 243 +++++++++++++ .../tests/test.js | 330 ++++++++++++++++++ 5 files changed, 685 insertions(+) create mode 100644 examples/410-sinch-voice-transcription-node/.env.example create mode 100644 examples/410-sinch-voice-transcription-node/README.md create mode 100644 examples/410-sinch-voice-transcription-node/package.json create mode 100644 examples/410-sinch-voice-transcription-node/src/index.js create mode 100644 examples/410-sinch-voice-transcription-node/tests/test.js diff --git a/examples/410-sinch-voice-transcription-node/.env.example b/examples/410-sinch-voice-transcription-node/.env.example new file mode 100644 index 0000000..4817100 --- /dev/null +++ b/examples/410-sinch-voice-transcription-node/.env.example @@ -0,0 +1,6 @@ +# Deepgram — https://console.deepgram.com/ +DEEPGRAM_API_KEY= + +# Sinch — https://dashboard.sinch.com/voice/apps +SINCH_APPLICATION_KEY= +SINCH_APPLICATION_SECRET= diff --git a/examples/410-sinch-voice-transcription-node/README.md b/examples/410-sinch-voice-transcription-node/README.md new file mode 100644 index 0000000..e2c4544 --- /dev/null +++ b/examples/410-sinch-voice-transcription-node/README.md @@ -0,0 +1,85 @@ +# Sinch Voice API — Real-Time Call Transcription + +Transcribe live phone calls in real-time by connecting the Sinch Voice API's ConnectStream WebSocket to Deepgram's streaming speech-to-text API. Every word spoken on a call is transcribed within milliseconds and printed to the console. + +## What you'll build + +A Node.js Express server that receives inbound Sinch phone calls, responds with SVAML to route the call audio over a WebSocket, and forwards the raw PCM audio to Deepgram for live transcription. + +## Prerequisites + +- Node.js 18+ +- Deepgram account — [get a free API key](https://console.deepgram.com/) +- Sinch account with Voice capability — [sign up](https://dashboard.sinch.com/) +- A Sinch Voice application with a linked phone number +- A public URL for your server (use [ngrok](https://ngrok.com/) for local development) + +## Environment variables + +| Variable | Where to find it | +|----------|-----------------| +| `DEEPGRAM_API_KEY` | [Deepgram console](https://console.deepgram.com/) | +| `SINCH_APPLICATION_KEY` | [Sinch dashboard](https://dashboard.sinch.com/voice/apps) → your Voice app | +| `SINCH_APPLICATION_SECRET` | [Sinch dashboard](https://dashboard.sinch.com/voice/apps) → your Voice app | + +Copy `.env.example` to `.env` and fill in your values. + +## Install and run + +```bash +npm install +npm start +``` + +Then expose the server publicly (for local dev): + +```bash +ngrok http 3000 +``` + +Configure your Sinch Voice application's callback URLs: +- **ICE URL:** `https:///sinch/ice` (HTTP POST) +- **ACE URL:** `https:///sinch/ace` (HTTP POST) +- **DiCE URL:** `https:///sinch/dice` (HTTP POST) + +Call your Sinch number — you'll see live transcripts in the console. + +## Key parameters + +| Parameter | Value | Description | +|-----------|-------|-------------| +| `model` | `nova-3` | Deepgram's latest and most accurate STT model | +| `encoding` | `linear16` | 16-bit signed PCM — the format Sinch streams | +| `sample_rate` | `16000` | 16 kHz wideband audio from Sinch ConnectStream | +| `smart_format` | `true` | Auto-formats numbers, dates, currency in transcripts | +| `interim_results` | `true` | Get partial transcripts as the caller speaks | +| `utterance_end_ms` | `1000` | Detect 1 s of silence as end of utterance | + +## How it works + +1. An incoming call triggers Sinch to POST an ICE (Incoming Call Event) to `/sinch/ice` +2. The server responds with SVAML: an `answer` instruction, a `say` greeting, and a `connectStream` action pointing to the `/stream` WebSocket +3. Sinch opens a WebSocket to `/stream` and streams the call audio as raw 16-bit linear PCM at 16 kHz +4. The server forwards each audio frame directly to a Deepgram live transcription WebSocket +5. Deepgram returns interim and final transcript events, which the server logs to the console +6. When the call ends, Sinch closes the WebSocket and both connections clean up + +## How this differs from Twilio and Vonage + +| | Twilio Media Streams | Vonage Voice WebSocket | Sinch ConnectStream | +|---|---|---|---| +| Audio format | μ-law, 8 kHz | Linear16, 16 kHz | Linear16, configurable (8–16 kHz) | +| WebSocket payload | Base64 JSON | Raw binary | Raw binary | +| Call control | TwiML (XML) | NCCO (JSON) | SVAML (JSON) | +| Webhook events | Single status callback | Answer + Event URLs | ICE + ACE + DiCE | + +## Related + +- [Deepgram live STT docs](https://developers.deepgram.com/docs/getting-started-with-live-streaming-audio) +- [Sinch Voice API docs](https://developers.sinch.com/docs/voice/) +- [Sinch SVAML reference](https://developers.sinch.com/docs/voice/api-reference/svaml/) +- [Sinch ConnectStream](https://developers.sinch.com/docs/voice/api-reference/voice/tag/Callbacks/) + +## Starter templates + +If you want a ready-to-run base for your own project, check the [deepgram-starters](https://github.com/orgs/deepgram-starters/repositories) org — there are starter repos for every language and every Deepgram product. diff --git a/examples/410-sinch-voice-transcription-node/package.json b/examples/410-sinch-voice-transcription-node/package.json new file mode 100644 index 0000000..c4d1fce --- /dev/null +++ b/examples/410-sinch-voice-transcription-node/package.json @@ -0,0 +1,21 @@ +{ + "name": "deepgram-sinch-voice-transcription", + "version": "1.0.0", + "description": "Transcribe Sinch phone calls in real-time using Deepgram live STT", + "main": "src/index.js", + "scripts": { + "start": "node src/index.js", + "test": "node tests/test.js" + }, + "dependencies": { + "@deepgram/sdk": "5.0.0", + "@sinch/sdk-core": "^1.4.0", + "dotenv": "^16.4.0", + "express": "^4.21.0", + "express-ws": "^5.0.2", + "ws": "^8.18.0" + }, + "engines": { + "node": ">=18" + } +} diff --git a/examples/410-sinch-voice-transcription-node/src/index.js b/examples/410-sinch-voice-transcription-node/src/index.js new file mode 100644 index 0000000..7f79303 --- /dev/null +++ b/examples/410-sinch-voice-transcription-node/src/index.js @@ -0,0 +1,243 @@ +'use strict'; + +require('dotenv').config(); + +const express = require('express'); +const expressWs = require('express-ws'); +const { DeepgramClient } = require('@deepgram/sdk'); +const { SinchClient } = require('@sinch/sdk-core'); + +const PORT = process.env.PORT || 3000; + +// Sinch streams 16-bit linear PCM at 8 kHz by default over its +// ConnectStream WebSocket. Setting sampleRate in streamingOptions +// lets you request 16 kHz instead — higher quality means better +// accuracy for names, numbers, and low-energy consonants. +const SINCH_SAMPLE_RATE = 16000; + +const DEEPGRAM_LIVE_OPTIONS = { + model: 'nova-3', + encoding: 'linear16', + sample_rate: SINCH_SAMPLE_RATE, + channels: 1, + smart_format: true, + // interim_results gives fast partial transcripts while the speaker + // is still talking. Set to false if you only need final results. + interim_results: true, + // utterance_end_ms fires an UtteranceEnd event after this much silence. + // 1000 ms is a good default for phone conversations. + utterance_end_ms: 1000, + tag: 'deepgram-examples', +}; + +function createApp() { + const app = express(); + expressWs(app); + app.use(express.json()); + + if (!process.env.DEEPGRAM_API_KEY) { + console.error('Error: DEEPGRAM_API_KEY environment variable is not set.'); + console.error('Copy .env.example to .env and add your API key.'); + process.exit(1); + } + + if (!process.env.SINCH_APPLICATION_KEY || !process.env.SINCH_APPLICATION_SECRET) { + console.error('Error: SINCH_APPLICATION_KEY and SINCH_APPLICATION_SECRET must be set.'); + console.error('Copy .env.example to .env and add your Sinch credentials.'); + process.exit(1); + } + + const deepgram = new DeepgramClient({ apiKey: process.env.DEEPGRAM_API_KEY }); + + // The SinchClient is used here to validate webhook signatures. + // Sinch signs every callback with HMAC so you can verify the request + // genuinely came from Sinch and wasn't spoofed. + const sinch = new SinchClient({ + applicationKey: process.env.SINCH_APPLICATION_KEY, + applicationSecret: process.env.SINCH_APPLICATION_SECRET, + }); + + // ICE (Incoming Call Event) — Sinch hits this when a call arrives. + // We respond with SVAML containing a "say" instruction (greeting) + // followed by a "connectStream" action that tells Sinch to open a + // WebSocket to our /stream endpoint and pipe the call audio through it. + // + // This is Sinch's equivalent of Twilio's TwiML or + // Vonage's NCCO "connect" with type "websocket". + app.post('/sinch/ice', (req, res) => { + const host = req.headers.host; + const protocol = req.headers['x-forwarded-proto'] === 'https' ? 'wss' : 'ws'; + const streamUrl = `${protocol}://${host}/stream`; + + const callId = req.body?.callid || 'unknown'; + console.log(`[ice] Incoming call ${callId} → streaming to ${streamUrl}`); + + // SVAML response: answer the call, play a greeting, then connect + // the audio stream to our WebSocket server for Deepgram transcription. + const svaml = { + instructions: [ + { name: 'answer' }, + { + name: 'say', + text: 'This call is being transcribed by Deepgram.', + locale: 'en-US', + }, + ], + action: { + name: 'connectStream', + destination: { + type: 'websocket', + endpoint: streamUrl, + }, + // streamingOptions.sampleRate controls the audio quality Sinch + // sends. 16000 Hz gives wideband audio — noticeably better than + // the default 8000 Hz telephony-grade stream. + streamingOptions: { + version: 1, + sampleRate: SINCH_SAMPLE_RATE, + }, + maxDuration: 3600, + callHeaders: [ + { key: 'call-id', value: callId }, + ], + }, + }; + + res.json(svaml); + }); + + // ACE (Answered Call Event) — fired when the callee answers. + // For a connectStream flow there's nothing special to do here; + // just acknowledge with a continue action to keep the call alive. + app.post('/sinch/ace', (req, res) => { + const callId = req.body?.callid || 'unknown'; + console.log(`[ace] Call answered: ${callId}`); + res.json({ action: { name: 'continue' } }); + }); + + // DiCE (Disconnect Call Event) — fired when the call ends. + // No SVAML response needed; just log and acknowledge. + app.post('/sinch/dice', (req, res) => { + const callId = req.body?.callid || 'unknown'; + const reason = req.body?.reason || 'unknown'; + console.log(`[dice] Call disconnected: ${callId} (${reason})`); + res.sendStatus(200); + }); + + // Each phone call opens a separate WebSocket here. Sinch sends an + // initial JSON text message with call metadata and custom headers, + // followed by binary frames containing raw linear16 PCM audio. + // This is similar to Vonage's WebSocket format — raw binary, no + // base64 wrapping like Twilio. + app.ws('/stream', (sinchWs, req) => { + let dgConnection = null; + let dgReady = false; + let callId = 'unknown'; + const mediaQueue = []; + + console.log('[stream] Sinch WebSocket connected'); + + sinchWs.on('message', (raw) => { + // Sinch sends two types of messages over the ConnectStream WebSocket: + // 1. An initial JSON text message with metadata (call headers, + // content-type, custom headers from the SVAML response). + // 2. Binary messages containing raw PCM audio frames. + if (typeof raw === 'string') { + try { + const metadata = JSON.parse(raw); + if (metadata?.callHeaders) { + const header = metadata.callHeaders.find(h => h.key === 'call-id'); + if (header) callId = header.value; + } + console.log(`[sinch] Stream metadata — call: ${callId}`); + } catch { + console.warn('[sinch] Unexpected text message:', raw.substring(0, 100)); + } + return; + } + + // Binary frame — raw linear16 PCM audio. Forward directly to Deepgram. + if (dgReady && dgConnection) { + try { dgConnection.sendMedia(raw); } catch {} + } else if (raw.length > 0) { + mediaQueue.push(raw); + } + }); + + sinchWs.on('close', () => { + console.log(`[stream] Sinch WebSocket closed (call: ${callId})`); + if (dgConnection) { + try { dgConnection.sendCloseStream({ type: 'CloseStream' }); } catch {} + try { dgConnection.close(); } catch {} + dgConnection = null; + } + }); + + sinchWs.on('error', (err) => { + console.error('[stream] Sinch WebSocket error:', err.message); + if (dgConnection) { + try { dgConnection.close(); } catch {} + dgConnection = null; + } + }); + + (async () => { + dgConnection = await deepgram.listen.v1.connect(DEEPGRAM_LIVE_OPTIONS); + + dgConnection.on('open', () => { + console.log('[deepgram] Connection opened'); + dgReady = true; + for (const buf of mediaQueue) { + try { dgConnection.sendMedia(buf); } catch {} + } + mediaQueue.length = 0; + }); + + dgConnection.on('error', (err) => { + console.error('[deepgram] Error:', err.message); + dgReady = false; + }); + + dgConnection.on('close', () => { + console.log('[deepgram] Connection closed'); + dgReady = false; + }); + + // Deepgram sends transcript events here. is_final distinguishes + // partial (interim) results from stable ones. In production you'd + // forward these to a UI, database, or analytics pipeline. + dgConnection.on('message', (data) => { + const transcript = data?.channel?.alternatives?.[0]?.transcript; + if (transcript) { + const tag = data.is_final ? 'final' : 'interim'; + console.log(`[${tag}] ${transcript}`); + } + }); + + dgConnection.connect(); + await dgConnection.waitForOpen(); + })().catch((err) => { + console.error('[deepgram] Setup failed:', err.message); + }); + }); + + app.get('/', (_req, res) => { + res.json({ status: 'ok', service: 'deepgram-sinch-voice-transcription' }); + }); + + return app; +} + +if (require.main === module) { + const app = createApp(); + app.listen(PORT, () => { + console.log(`Server listening on port ${PORT}`); + console.log(` POST /sinch/ice — Sinch ICE webhook (returns SVAML)`); + console.log(` POST /sinch/ace — Sinch ACE webhook`); + console.log(` POST /sinch/dice — Sinch DiCE webhook`); + console.log(` WS /stream — Sinch audio WebSocket`); + console.log(` GET / — Health check`); + }); +} + +module.exports = { createApp }; diff --git a/examples/410-sinch-voice-transcription-node/tests/test.js b/examples/410-sinch-voice-transcription-node/tests/test.js new file mode 100644 index 0000000..72a5707 --- /dev/null +++ b/examples/410-sinch-voice-transcription-node/tests/test.js @@ -0,0 +1,330 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const http = require('http'); +const { execSync } = require('child_process'); +const WebSocket = require('ws'); + +// ── Credential check ───────────────────────────────────────────────────────── +// Exit code convention: +// 0 = all tests passed +// 1 = real test failure +// 2 = missing credentials (expected in CI until secrets are configured) +const envExample = path.join(__dirname, '..', '.env.example'); +const required = fs.readFileSync(envExample, 'utf8') + .split('\n') + .filter(l => /^[A-Z][A-Z0-9_]+=/.test(l.trim())) + .map(l => l.split('=')[0].trim()); + +const missing = required.filter(k => !process.env[k]); +if (missing.length > 0) { + console.error(`MISSING_CREDENTIALS: ${missing.join(',')}`); + process.exit(2); +} +// ───────────────────────────────────────────────────────────────────────────── + +const { createApp } = require('../src/index.js'); + +const PORT = 3098; +const AUDIO_URL = 'https://dpgr.am/spacewalk.wav'; +const TMP_WAV = '/tmp/sinch_test.wav'; +const CHUNK_SIZE = 640; + +function wavToLinear16(wavBuffer, targetRate) { + let offset = 12; + let sampleRate = 0, bitsPerSample = 0, numChannels = 0, dataStart = 0, dataSize = 0; + while (offset < wavBuffer.length - 8) { + const chunkId = wavBuffer.toString('ascii', offset, offset + 4); + const chunkSize = wavBuffer.readUInt32LE(offset + 4); + if (chunkId === 'fmt ') { + numChannels = wavBuffer.readUInt16LE(offset + 10); + sampleRate = wavBuffer.readUInt32LE(offset + 12); + bitsPerSample = wavBuffer.readUInt16LE(offset + 22); + } else if (chunkId === 'data') { + dataStart = offset + 8; + dataSize = chunkSize; + break; + } + offset += 8 + chunkSize; + } + if (!dataStart) throw new Error('Invalid WAV: no data chunk'); + + const bytesPerSample = bitsPerSample / 8; + const totalSamples = Math.floor(dataSize / (bytesPerSample * numChannels)); + const ratio = sampleRate / targetRate; + const outLen = Math.floor(totalSamples / ratio); + const out = Buffer.alloc(outLen * 2); + + for (let i = 0; i < outLen; i++) { + const srcIdx = Math.floor(i * ratio); + const byteOff = dataStart + srcIdx * bytesPerSample * numChannels; + let sample; + if (bitsPerSample === 16) { + sample = wavBuffer.readInt16LE(byteOff); + } else if (bitsPerSample === 24) { + sample = (wavBuffer[byteOff] | (wavBuffer[byteOff + 1] << 8) | (wavBuffer[byteOff + 2] << 16)); + if (sample & 0x800000) sample |= ~0xFFFFFF; + sample = sample >> 8; + } else if (bitsPerSample === 32) { + sample = wavBuffer.readInt32LE(byteOff) >> 16; + } else { + sample = (wavBuffer[byteOff] - 128) << 8; + } + out.writeInt16LE(Math.max(-32768, Math.min(32767, sample)), i * 2); + } + return out; +} + +function prepareAudio() { + console.log('Downloading test audio...'); + execSync(`curl -s -L -o "${TMP_WAV}" "${AUDIO_URL}"`, { stdio: 'pipe' }); + + console.log('Converting to linear16 16 kHz mono...'); + const wavData = fs.readFileSync(TMP_WAV); + const audio = wavToLinear16(wavData, 16000); + console.log(`Audio ready: ${audio.length} bytes of linear16 16 kHz`); + return audio; +} + +// ── Test 1: ICE endpoint returns valid SVAML ───────────────────────────────── +function testIceEndpoint(port) { + return new Promise((resolve, reject) => { + const body = JSON.stringify({ event: 'ice', callid: 'test-call-123' }); + const req = http.request( + { + hostname: 'localhost', port, path: '/sinch/ice', method: 'POST', + headers: { + host: `localhost:${port}`, + 'content-type': 'application/json', + 'content-length': Buffer.byteLength(body), + }, + }, + (res) => { + let data = ''; + res.on('data', c => (data += c)); + res.on('end', () => { + if (res.statusCode !== 200) return reject(new Error(`/sinch/ice returned ${res.statusCode}`)); + let svaml; + try { svaml = JSON.parse(data); } catch (e) { return reject(e); } + + if (!svaml.instructions || !Array.isArray(svaml.instructions)) { + return reject(new Error('SVAML must include instructions array')); + } + + const answerInstr = svaml.instructions.find(i => i.name === 'answer'); + if (!answerInstr) { + return reject(new Error('SVAML instructions must include "answer"')); + } + + const sayInstr = svaml.instructions.find(i => i.name === 'say'); + if (!sayInstr || !sayInstr.text) { + return reject(new Error('SVAML instructions must include "say" with text')); + } + + if (!svaml.action || svaml.action.name !== 'connectStream') { + return reject(new Error('SVAML action must be "connectStream"')); + } + + if (!svaml.action.destination || svaml.action.destination.type !== 'websocket') { + return reject(new Error('connectStream destination must be websocket type')); + } + + if (!svaml.action.destination.endpoint || !svaml.action.destination.endpoint.includes('/stream')) { + return reject(new Error(`connectStream endpoint must point to /stream, got: ${svaml.action.destination.endpoint}`)); + } + + if (!svaml.action.streamingOptions || svaml.action.streamingOptions.sampleRate !== 16000) { + return reject(new Error('streamingOptions must set sampleRate to 16000')); + } + + console.log('POST /sinch/ice → valid SVAML with connectStream'); + console.log(` destination: ${svaml.action.destination.endpoint}`); + console.log(` sampleRate: ${svaml.action.streamingOptions.sampleRate}`); + resolve(svaml); + }); + }, + ); + req.on('error', reject); + req.end(body); + }); +} + +// ── Test 2: ACE endpoint ───────────────────────────────────────────────────── +function testAceEndpoint(port) { + return new Promise((resolve, reject) => { + const body = JSON.stringify({ event: 'ace', callid: 'test-call-123' }); + const req = http.request( + { + hostname: 'localhost', port, path: '/sinch/ace', method: 'POST', + headers: { + host: `localhost:${port}`, + 'content-type': 'application/json', + 'content-length': Buffer.byteLength(body), + }, + }, + (res) => { + let data = ''; + res.on('data', c => (data += c)); + res.on('end', () => { + if (res.statusCode !== 200) return reject(new Error(`/sinch/ace returned ${res.statusCode}`)); + let resp; + try { resp = JSON.parse(data); } catch (e) { return reject(e); } + if (!resp.action || resp.action.name !== 'continue') { + return reject(new Error('ACE response must have action "continue"')); + } + console.log('POST /sinch/ace → valid response with continue action'); + resolve(); + }); + }, + ); + req.on('error', reject); + req.end(body); + }); +} + +// ── Test 3: Health check ───────────────────────────────────────────────────── +function testHealthCheck(port) { + return new Promise((resolve, reject) => { + http.get(`http://127.0.0.1:${port}/`, (res) => { + let body = ''; + res.on('data', c => (body += c)); + res.on('end', () => { + if (res.statusCode !== 200) return reject(new Error(`Health check returned ${res.statusCode}`)); + let data; + try { data = JSON.parse(body); } catch (e) { return reject(e); } + if (data.status !== 'ok') { + return reject(new Error(`Health check must return {status: "ok"}, got: ${JSON.stringify(data)}`)); + } + console.log('GET / → {status: "ok"}'); + resolve(); + }); + }).on('error', reject); + }); +} + +// ── Test 4: Full WebSocket + Deepgram pipeline ────────────────────────────── +// Connects to /stream acting as Sinch, sends real linear16 audio in Sinch's +// format, and verifies Deepgram returns transcript text. +function testStreamFlow(port, audioData) { + return new Promise((resolve, reject) => { + const transcripts = []; + + const origLog = console.log; + console.log = (...args) => { + origLog(...args); + const line = args.join(' '); + if (line.startsWith('[final]') || line.startsWith('[interim]')) { + transcripts.push(line); + } + }; + + const cleanup = (fn) => { console.log = origLog; fn(); }; + let settled = false; + + const timeout = setTimeout(() => { + cleanup(() => reject(new Error( + 'Timed out (30s) waiting for Deepgram transcript.\n' + + 'Check DEEPGRAM_API_KEY and connectivity to api.deepgram.com.', + ))); + }, 30_000); + + const settle = () => { + if (settled) return; + settled = true; + clearTimeout(timeout); + cleanup(() => { + if (transcripts.length === 0) { + reject(new Error( + 'No transcripts received from Deepgram after streaming audio.\n' + + 'This may indicate a Deepgram connection issue or audio encoding problem.', + )); + } else { + resolve(transcripts); + } + }); + }; + + const ws = new WebSocket(`ws://localhost:${port}/stream`); + + ws.on('error', (err) => { + clearTimeout(timeout); + cleanup(() => reject(err)); + }); + + ws.on('open', () => { + // Sinch sends an initial JSON text message with metadata and call headers, + // then binary audio frames. + ws.send(JSON.stringify({ + event: 'stream-start', + callHeaders: [{ key: 'call-id', value: 'ci-test-call' }], + contentType: 'audio/l16;rate=16000', + })); + + let offset = 0; + const MAX_BYTES = 16000 * 2 * 10; // 10 seconds of 16 kHz 16-bit mono + + const sendChunk = () => { + if (ws.readyState !== WebSocket.OPEN) return; + + if (offset >= audioData.length || offset >= MAX_BYTES) { + setTimeout(() => { + try { ws.close(); } catch {} + setTimeout(settle, 2000); + }, 500); + return; + } + + ws.send(audioData.subarray(offset, offset + CHUNK_SIZE)); + offset += CHUNK_SIZE; + setTimeout(sendChunk, 20); + }; + + setTimeout(sendChunk, 500); + }); + + ws.on('close', () => { + setTimeout(settle, 2000); + }); + }); +} + +// ── Main ──────────────────────────────────────────────────────────────────── +async function run() { + const audioData = prepareAudio(); + + const app = createApp(); + const server = app.listen(PORT); + await new Promise(r => server.on('listening', r)); + console.log(`\nServer started on :${PORT}`); + + try { + await testIceEndpoint(PORT); + await testAceEndpoint(PORT); + await testHealthCheck(PORT); + + console.log('\nStreaming audio through server → Deepgram (up to 30 s)...'); + const transcripts = await testStreamFlow(PORT, audioData); + + console.log(`\nReceived ${transcripts.length} transcript event(s)`); + console.log(` First: ${transcripts[0]}`); + + const combined = transcripts.join(' ').toLowerCase(); + const bytesSent = Math.min(audioData.length, 16000 * 2 * 10); + const audioSentSecs = bytesSent / (16000 * 2); + const minChars = Math.max(5, audioSentSecs * 2); + if (combined.length < minChars) { + throw new Error( + `Transcript too short: ${combined.length} chars for ${audioSentSecs}s of audio (expected >= ${minChars})`, + ); + } + console.log(`Transcript length verified: ${combined.length} chars for ${audioSentSecs.toFixed(1)}s audio`); + + } finally { + server.close(); + } +} + +run() + .then(() => { console.log('\nAll tests passed'); process.exit(0); }) + .catch(err => { console.error(`\nTest failed: ${err.message}`); process.exit(1); });