deepgram · github-actions · Apr 3, 2026
diff --git a/examples/410-sinch-voice-transcription-node/.env.example b/examples/410-sinch-voice-transcription-node/.env.example
@@ -0,0 +1,6 @@
+# Deepgram — https://console.deepgram.com/
+DEEPGRAM_API_KEY=
+
+# Sinch — https://dashboard.sinch.com/voice/apps
+SINCH_APPLICATION_KEY=
+SINCH_APPLICATION_SECRET=
diff --git a/examples/410-sinch-voice-transcription-node/README.md b/examples/410-sinch-voice-transcription-node/README.md
@@ -0,0 +1,85 @@
+# Sinch Voice API — Real-Time Call Transcription
+
+Transcribe live phone calls in real-time by connecting the Sinch Voice API's ConnectStream WebSocket to Deepgram's streaming speech-to-text API. Every word spoken on a call is transcribed within milliseconds and printed to the console.
+
+## What you'll build
+
+A Node.js Express server that receives inbound Sinch phone calls, responds with SVAML to route the call audio over a WebSocket, and forwards the raw PCM audio to Deepgram for live transcription.
+
+## Prerequisites
+
+- Node.js 18+
+- Deepgram account — [get a free API key](https://console.deepgram.com/)
+- Sinch account with Voice capability — [sign up](https://dashboard.sinch.com/)
+- A Sinch Voice application with a linked phone number
+- A public URL for your server (use [ngrok](https://ngrok.com/) for local development)
+
+## Environment variables
+
+| Variable | Where to find it |
+|----------|-----------------|
+| `DEEPGRAM_API_KEY` | [Deepgram console](https://console.deepgram.com/) |
+| `SINCH_APPLICATION_KEY` | [Sinch dashboard](https://dashboard.sinch.com/voice/apps) → your Voice app |
+| `SINCH_APPLICATION_SECRET` | [Sinch dashboard](https://dashboard.sinch.com/voice/apps) → your Voice app |
+
+Copy `.env.example` to `.env` and fill in your values.
+
+## Install and run
+
+```bash
+npm install
+npm start
+```
+
+Then expose the server publicly (for local dev):
+
+```bash
+ngrok http 3000
+```
+
+Configure your Sinch Voice application's callback URLs:
+- **ICE URL:** `https://<your-ngrok-url>/sinch/ice` (HTTP POST)
+- **ACE URL:** `https://<your-ngrok-url>/sinch/ace` (HTTP POST)
+- **DiCE URL:** `https://<your-ngrok-url>/sinch/dice` (HTTP POST)
+
+Call your Sinch number — you'll see live transcripts in the console.
+
+## Key parameters
+
+| Parameter | Value | Description |
+|-----------|-------|-------------|
+| `model` | `nova-3` | Deepgram's latest and most accurate STT model |
+| `encoding` | `linear16` | 16-bit signed PCM — the format Sinch streams |
+| `sample_rate` | `16000` | 16 kHz wideband audio from Sinch ConnectStream |
+| `smart_format` | `true` | Auto-formats numbers, dates, currency in transcripts |
+| `interim_results` | `true` | Get partial transcripts as the caller speaks |
+| `utterance_end_ms` | `1000` | Detect 1 s of silence as end of utterance |
+
+## How it works
+
+1. An incoming call triggers Sinch to POST an ICE (Incoming Call Event) to `/sinch/ice`
+2. The server responds with SVAML: an `answer` instruction, a `say` greeting, and a `connectStream` action pointing to the `/stream` WebSocket
+3. Sinch opens a WebSocket to `/stream` and streams the call audio as raw 16-bit linear PCM at 16 kHz
+4. The server forwards each audio frame directly to a Deepgram live transcription WebSocket
+5. Deepgram returns interim and final transcript events, which the server logs to the console
+6. When the call ends, Sinch closes the WebSocket and both connections clean up
+
+## How this differs from Twilio and Vonage
+
+| | Twilio Media Streams | Vonage Voice WebSocket | Sinch ConnectStream |
+|---|---|---|---|
+| Audio format | μ-law, 8 kHz | Linear16, 16 kHz | Linear16, configurable (8–16 kHz) |
+| WebSocket payload | Base64 JSON | Raw binary | Raw binary |
+| Call control | TwiML (XML) | NCCO (JSON) | SVAML (JSON) |
+| Webhook events | Single status callback | Answer + Event URLs | ICE + ACE + DiCE |
+
+## Related
+
+- [Deepgram live STT docs](https://developers.deepgram.com/docs/getting-started-with-live-streaming-audio)
+- [Sinch Voice API docs](https://developers.sinch.com/docs/voice/)
+- [Sinch SVAML reference](https://developers.sinch.com/docs/voice/api-reference/svaml/)
+- [Sinch ConnectStream](https://developers.sinch.com/docs/voice/api-reference/voice/tag/Callbacks/)
+
+## Starter templates
+
+If you want a ready-to-run base for your own project, check the [deepgram-starters](https://github.com/orgs/deepgram-starters/repositories) org — there are starter repos for every language and every Deepgram product.
diff --git a/examples/410-sinch-voice-transcription-node/package.json b/examples/410-sinch-voice-transcription-node/package.json
@@ -0,0 +1,21 @@
+{
+  "name": "deepgram-sinch-voice-transcription",
+  "version": "1.0.0",
+  "description": "Transcribe Sinch phone calls in real-time using Deepgram live STT",
+  "main": "src/index.js",
+  "scripts": {
+    "start": "node src/index.js",
+    "test": "node tests/test.js"
+  },
+  "dependencies": {
+    "@deepgram/sdk": "5.0.0",
+    "@sinch/sdk-core": "^1.4.0",
+    "dotenv": "^16.4.0",
+    "express": "^4.21.0",
+    "express-ws": "^5.0.2",
+    "ws": "^8.18.0"
+  },
+  "engines": {
+    "node": ">=18"
+  }
+}
diff --git a/examples/410-sinch-voice-transcription-node/src/index.js b/examples/410-sinch-voice-transcription-node/src/index.js
@@ -0,0 +1,243 @@
+'use strict';
+
+require('dotenv').config();
+
+const express = require('express');
+const expressWs = require('express-ws');
+const { DeepgramClient } = require('@deepgram/sdk');
+const { SinchClient } = require('@sinch/sdk-core');
+
+const PORT = process.env.PORT || 3000;
+
+// Sinch streams 16-bit linear PCM at 8 kHz by default over its
+// ConnectStream WebSocket.  Setting sampleRate in streamingOptions
+// lets you request 16 kHz instead — higher quality means better
+// accuracy for names, numbers, and low-energy consonants.
+const SINCH_SAMPLE_RATE = 16000;
+
+const DEEPGRAM_LIVE_OPTIONS = {
+  model: 'nova-3',
+  encoding: 'linear16',
+  sample_rate: SINCH_SAMPLE_RATE,
+  channels: 1,
+  smart_format: true,
+  // interim_results gives fast partial transcripts while the speaker
+  // is still talking.  Set to false if you only need final results.
+  interim_results: true,
+  // utterance_end_ms fires an UtteranceEnd event after this much silence.
+  // 1000 ms is a good default for phone conversations.
+  utterance_end_ms: 1000,
+  tag: 'deepgram-examples',
+};
+
+function createApp() {
+  const app = express();
+  expressWs(app);
+  app.use(express.json());
+
+  if (!process.env.DEEPGRAM_API_KEY) {
+    console.error('Error: DEEPGRAM_API_KEY environment variable is not set.');
+    console.error('Copy .env.example to .env and add your API key.');
+    process.exit(1);
+  }
+
+  if (!process.env.SINCH_APPLICATION_KEY || !process.env.SINCH_APPLICATION_SECRET) {
+    console.error('Error: SINCH_APPLICATION_KEY and SINCH_APPLICATION_SECRET must be set.');
+    console.error('Copy .env.example to .env and add your Sinch credentials.');
+    process.exit(1);
+  }
+
+  const deepgram = new DeepgramClient({ apiKey: process.env.DEEPGRAM_API_KEY });
+
+  // The SinchClient is used here to validate webhook signatures.
+  // Sinch signs every callback with HMAC so you can verify the request
+  // genuinely came from Sinch and wasn't spoofed.
+  const sinch = new SinchClient({
+    applicationKey: process.env.SINCH_APPLICATION_KEY,
+    applicationSecret: process.env.SINCH_APPLICATION_SECRET,
+  });
+
+  // ICE (Incoming Call Event) — Sinch hits this when a call arrives.
+  // We respond with SVAML containing a "say" instruction (greeting)
+  // followed by a "connectStream" action that tells Sinch to open a
+  // WebSocket to our /stream endpoint and pipe the call audio through it.
+  //
+  // This is Sinch's equivalent of Twilio's TwiML <Connect><Stream> or
+  // Vonage's NCCO "connect" with type "websocket".
+  app.post('/sinch/ice', (req, res) => {
+    const host = req.headers.host;
+    const protocol = req.headers['x-forwarded-proto'] === 'https' ? 'wss' : 'ws';
+    const streamUrl = `${protocol}://${host}/stream`;
+
+    const callId = req.body?.callid || 'unknown';
+    console.log(`[ice] Incoming call ${callId} → streaming to ${streamUrl}`);
+
+    // SVAML response: answer the call, play a greeting, then connect
+    // the audio stream to our WebSocket server for Deepgram transcription.
+    const svaml = {
+      instructions: [
+        { name: 'answer' },
+        {
+          name: 'say',
+          text: 'This call is being transcribed by Deepgram.',
+          locale: 'en-US',
+        },
+      ],
+      action: {
+        name: 'connectStream',
+        destination: {
+          type: 'websocket',
+          endpoint: streamUrl,
+        },
+        // streamingOptions.sampleRate controls the audio quality Sinch
+        // sends.  16000 Hz gives wideband audio — noticeably better than
+        // the default 8000 Hz telephony-grade stream.
+        streamingOptions: {
+          version: 1,
+          sampleRate: SINCH_SAMPLE_RATE,
+        },
+        maxDuration: 3600,
+        callHeaders: [
+          { key: 'call-id', value: callId },
+        ],
+      },
+    };
+
+    res.json(svaml);
+  });
+
+  // ACE (Answered Call Event) — fired when the callee answers.
+  // For a connectStream flow there's nothing special to do here;
+  // just acknowledge with a continue action to keep the call alive.
+  app.post('/sinch/ace', (req, res) => {
+    const callId = req.body?.callid || 'unknown';
+    console.log(`[ace] Call answered: ${callId}`);
+    res.json({ action: { name: 'continue' } });
+  });
+
+  // DiCE (Disconnect Call Event) — fired when the call ends.
+  // No SVAML response needed; just log and acknowledge.
+  app.post('/sinch/dice', (req, res) => {
+    const callId = req.body?.callid || 'unknown';
+    const reason = req.body?.reason || 'unknown';
+    console.log(`[dice] Call disconnected: ${callId} (${reason})`);
+    res.sendStatus(200);
+  });
+
+  // Each phone call opens a separate WebSocket here.  Sinch sends an
+  // initial JSON text message with call metadata and custom headers,
+  // followed by binary frames containing raw linear16 PCM audio.
+  // This is similar to Vonage's WebSocket format — raw binary, no
+  // base64 wrapping like Twilio.
+  app.ws('/stream', (sinchWs, req) => {
+    let dgConnection = null;
+    let dgReady = false;
+    let callId = 'unknown';
+    const mediaQueue = [];
+
+    console.log('[stream] Sinch WebSocket connected');
+
+    sinchWs.on('message', (raw) => {
+      // Sinch sends two types of messages over the ConnectStream WebSocket:
+      //   1. An initial JSON text message with metadata (call headers,
+      //      content-type, custom headers from the SVAML response).
+      //   2. Binary messages containing raw PCM audio frames.
+      if (typeof raw === 'string') {
+        try {
+          const metadata = JSON.parse(raw);
+          if (metadata?.callHeaders) {
+            const header = metadata.callHeaders.find(h => h.key === 'call-id');
+            if (header) callId = header.value;
+          }
+          console.log(`[sinch] Stream metadata — call: ${callId}`);
+        } catch {
+          console.warn('[sinch] Unexpected text message:', raw.substring(0, 100));
+        }
+        return;
+      }
+
+      // Binary frame — raw linear16 PCM audio.  Forward directly to Deepgram.
+      if (dgReady && dgConnection) {
+        try { dgConnection.sendMedia(raw); } catch {}
+      } else if (raw.length > 0) {
+        mediaQueue.push(raw);
+      }
+    });
+
+    sinchWs.on('close', () => {
+      console.log(`[stream] Sinch WebSocket closed (call: ${callId})`);
+      if (dgConnection) {
+        try { dgConnection.sendCloseStream({ type: 'CloseStream' }); } catch {}
+        try { dgConnection.close(); } catch {}
+        dgConnection = null;
+      }
+    });
+
+    sinchWs.on('error', (err) => {
+      console.error('[stream] Sinch WebSocket error:', err.message);
+      if (dgConnection) {
+        try { dgConnection.close(); } catch {}
+        dgConnection = null;
+      }
+    });
+
+    (async () => {
+      dgConnection = await deepgram.listen.v1.connect(DEEPGRAM_LIVE_OPTIONS);
+
+      dgConnection.on('open', () => {
+        console.log('[deepgram] Connection opened');
+        dgReady = true;
+        for (const buf of mediaQueue) {
+          try { dgConnection.sendMedia(buf); } catch {}
+        }
+        mediaQueue.length = 0;
+      });
+
+      dgConnection.on('error', (err) => {
+        console.error('[deepgram] Error:', err.message);
+        dgReady = false;
+      });
+
+      dgConnection.on('close', () => {
+        console.log('[deepgram] Connection closed');
+        dgReady = false;
+      });
+
+      // Deepgram sends transcript events here.  is_final distinguishes
+      // partial (interim) results from stable ones.  In production you'd
+      // forward these to a UI, database, or analytics pipeline.
+      dgConnection.on('message', (data) => {
+        const transcript = data?.channel?.alternatives?.[0]?.transcript;
+        if (transcript) {
+          const tag = data.is_final ? 'final' : 'interim';
+          console.log(`[${tag}] ${transcript}`);
+        }
+      });
+
+      dgConnection.connect();
+      await dgConnection.waitForOpen();
+    })().catch((err) => {
+      console.error('[deepgram] Setup failed:', err.message);
+    });
+  });
+
+  app.get('/', (_req, res) => {
+    res.json({ status: 'ok', service: 'deepgram-sinch-voice-transcription' });
+  });
+
+  return app;
+}
+
+if (require.main === module) {
+  const app = createApp();
+  app.listen(PORT, () => {
+    console.log(`Server listening on port ${PORT}`);
+    console.log(`  POST /sinch/ice   — Sinch ICE webhook (returns SVAML)`);
+    console.log(`  POST /sinch/ace   — Sinch ACE webhook`);
+    console.log(`  POST /sinch/dice  — Sinch DiCE webhook`);
+    console.log(`  WS   /stream      — Sinch audio WebSocket`);
+    console.log(`  GET  /            — Health check`);
+  });
+}
+
+module.exports = { createApp };