From 5fe4e7e1fa458a8be3c366f02df2ae86c176fc5d Mon Sep 17 00:00:00 2001 From: examples-bot Date: Thu, 2 Apr 2026 13:03:45 +0000 Subject: [PATCH 1/2] =?UTF-8?q?feat(examples):=20add=20260=20=E2=80=94=20W?= =?UTF-8?q?hatsApp=20Business=20voice=20message=20transcription=20(Node.js?= =?UTF-8?q?)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../.env.example | 7 + .../README.md | 65 +++++ .../package.json | 18 ++ .../src/server.js | 249 ++++++++++++++++++ .../tests/test.js | 85 ++++++ 5 files changed, 424 insertions(+) create mode 100644 examples/260-whatsapp-voice-transcription-node/.env.example create mode 100644 examples/260-whatsapp-voice-transcription-node/README.md create mode 100644 examples/260-whatsapp-voice-transcription-node/package.json create mode 100644 examples/260-whatsapp-voice-transcription-node/src/server.js create mode 100644 examples/260-whatsapp-voice-transcription-node/tests/test.js diff --git a/examples/260-whatsapp-voice-transcription-node/.env.example b/examples/260-whatsapp-voice-transcription-node/.env.example new file mode 100644 index 0000000..d6aa43e --- /dev/null +++ b/examples/260-whatsapp-voice-transcription-node/.env.example @@ -0,0 +1,7 @@ +# Deepgram — https://console.deepgram.com/ +DEEPGRAM_API_KEY= + +# WhatsApp Business Cloud API — https://developers.facebook.com/apps/ +WHATSAPP_TOKEN= +WHATSAPP_VERIFY_TOKEN= +WHATSAPP_PHONE_NUMBER_ID= diff --git a/examples/260-whatsapp-voice-transcription-node/README.md b/examples/260-whatsapp-voice-transcription-node/README.md new file mode 100644 index 0000000..d7ab84f --- /dev/null +++ b/examples/260-whatsapp-voice-transcription-node/README.md @@ -0,0 +1,65 @@ +# WhatsApp Business — Voice Message Transcription with Deepgram + +A Node.js webhook server that receives WhatsApp voice messages via the WhatsApp Business Cloud API, transcribes them with Deepgram nova-3, and replies with the transcript. Ideal for customer service bots, compliance logging, and accessibility tools in WhatsApp-first markets. + +## What you'll build + +An Express server that acts as a WhatsApp Business webhook. When a user sends a voice note, the server downloads the audio from Meta's CDN, sends it to Deepgram's pre-recorded speech-to-text API, and replies to the sender with the transcript — all within seconds. + +## Prerequisites + +- Node.js 18+ +- Deepgram account — [get a free API key](https://console.deepgram.com/) +- Meta Business account with WhatsApp Business API access — [get started](https://developers.facebook.com/docs/whatsapp/cloud-api/get-started) + +## Environment variables + +| Variable | Where to find it | +|----------|-----------------| +| `DEEPGRAM_API_KEY` | [Deepgram console](https://console.deepgram.com/) | +| `WHATSAPP_TOKEN` | [Meta App Dashboard](https://developers.facebook.com/apps/) → your app → WhatsApp → API Setup → Temporary access token (or a permanent System User token) | +| `WHATSAPP_VERIFY_TOKEN` | A secret string you choose — enter it in both your `.env` and the Meta webhook configuration | +| `WHATSAPP_PHONE_NUMBER_ID` | [Meta App Dashboard](https://developers.facebook.com/apps/) → your app → WhatsApp → API Setup → Phone number ID | + +Copy `.env.example` to `.env` and fill in your values. + +## Install and run + +```bash +npm install +npm start +``` + +The server starts on port 3000 (override with `PORT` env var). You'll need a public URL for Meta's webhook — use [ngrok](https://ngrok.com/) for local development: + +```bash +ngrok http 3000 +``` + +Then configure the webhook in the Meta App Dashboard: +1. Go to your app → WhatsApp → Configuration +2. Set the Callback URL to `https:///webhook` +3. Set the Verify Token to your `WHATSAPP_VERIFY_TOKEN` value +4. Subscribe to the `messages` webhook field + +## Key parameters + +| Parameter | Value | Description | +|-----------|-------|-------------| +| `model` | `nova-3` | General-purpose STT model with best accuracy | +| `smart_format` | `true` | Adds punctuation, capitalisation, and number formatting | +| `detect_language` | `true` | Auto-detects the spoken language — useful for international WhatsApp users | + +## How it works + +1. Meta sends a POST to `/webhook` when a WhatsApp message arrives +2. The server filters for `audio` type messages (voice notes, audio attachments) +3. It retrieves the media download URL from Meta's Graph API using the media ID +4. The audio file is downloaded from Meta's CDN (Opus/OGG format for voice notes) +5. The audio buffer is sent to Deepgram's `transcribeFile()` pre-recorded API +6. The transcript is sent back to the user as a WhatsApp text reply +7. The server responds 200 immediately to avoid Meta's webhook retry logic + +## Starter templates + +[deepgram-starters](https://github.com/orgs/deepgram-starters/repositories) diff --git a/examples/260-whatsapp-voice-transcription-node/package.json b/examples/260-whatsapp-voice-transcription-node/package.json new file mode 100644 index 0000000..e451c0e --- /dev/null +++ b/examples/260-whatsapp-voice-transcription-node/package.json @@ -0,0 +1,18 @@ +{ + "name": "deepgram-whatsapp-voice-transcription-node", + "version": "1.0.0", + "description": "WhatsApp webhook server that transcribes voice messages using Deepgram nova-3", + "main": "src/server.js", + "scripts": { + "start": "node src/server.js", + "test": "node tests/test.js" + }, + "dependencies": { + "@deepgram/sdk": "5.0.0", + "dotenv": "^16.4.0", + "express": "^4.21.0" + }, + "engines": { + "node": ">=18" + } +} diff --git a/examples/260-whatsapp-voice-transcription-node/src/server.js b/examples/260-whatsapp-voice-transcription-node/src/server.js new file mode 100644 index 0000000..ff3344f --- /dev/null +++ b/examples/260-whatsapp-voice-transcription-node/src/server.js @@ -0,0 +1,249 @@ +'use strict'; + +// WhatsApp Business Cloud API webhook server that transcribes incoming voice +// messages using Deepgram nova-3 and replies with the transcript. +// +// Architecture: +// 1. Meta sends webhook events to this server when a WhatsApp message arrives +// 2. We filter for audio messages (voice notes, audio attachments) +// 3. Download the media from Meta's CDN using the WhatsApp token +// 4. Send the audio buffer to Deepgram's pre-recorded STT API +// 5. Reply to the sender with the transcript via the WhatsApp Cloud API + +require('dotenv').config(); + +const express = require('express'); +const https = require('https'); +const { DeepgramClient } = require('@deepgram/sdk'); + +const PORT = process.env.PORT || 3000; + +const REQUIRED_ENV = [ + 'DEEPGRAM_API_KEY', + 'WHATSAPP_TOKEN', + 'WHATSAPP_VERIFY_TOKEN', + 'WHATSAPP_PHONE_NUMBER_ID', +]; + +const missing = REQUIRED_ENV.filter(k => !process.env[k]); +if (missing.length > 0) { + console.error(`Error: missing environment variables: ${missing.join(', ')}`); + console.error('Copy .env.example to .env and fill in your values.'); + process.exit(1); +} + +const deepgram = new DeepgramClient({ apiKey: process.env.DEEPGRAM_API_KEY }); +const app = express(); + +// Meta sends webhook payloads as JSON. +app.use(express.json()); + +// ── Webhook verification ───────────────────────────────────────────────────── +// Meta verifies ownership of the webhook URL by sending a GET with a challenge. +// This must respond with the challenge value when hub.verify_token matches +// the token you configured in the Meta App Dashboard. +app.get('/webhook', (req, res) => { + const mode = req.query['hub.mode']; + const token = req.query['hub.verify_token']; + const challenge = req.query['hub.challenge']; + + if (mode === 'subscribe' && token === process.env.WHATSAPP_VERIFY_TOKEN) { + console.log('Webhook verified'); + return res.status(200).send(challenge); + } + return res.sendStatus(403); +}); + +// ── Incoming messages ──────────────────────────────────────────────────────── +app.post('/webhook', async (req, res) => { + // Respond 200 immediately — Meta retries if the webhook doesn't ack within + // a few seconds, which would cause duplicate processing. + res.sendStatus(200); + + try { + const entry = req.body?.entry?.[0]; + const changes = entry?.changes?.[0]; + const value = changes?.value; + + if (!value?.messages) return; + + for (const message of value.messages) { + // WhatsApp voice notes have type "audio". Ignore text, image, etc. + if (message.type !== 'audio') continue; + + const from = message.from; + const mediaId = message.audio?.id; + if (!mediaId) continue; + + console.log(`Voice message from ${from} (media: ${mediaId})`); + + try { + const audioBuffer = await downloadWhatsAppMedia(mediaId); + const transcript = await transcribeAudio(audioBuffer); + + if (!transcript) { + await sendWhatsAppMessage(from, 'No speech detected in your voice message.'); + continue; + } + + // WhatsApp messages cap at 4096 characters — truncate if needed. + const reply = transcript.length > 4000 + ? transcript.substring(0, 4000) + '...' + : transcript; + + await sendWhatsAppMessage(from, `Transcript:\n\n${reply}`); + console.log(`Transcript sent to ${from} (${transcript.length} chars)`); + } catch (err) { + console.error(`Failed to process voice message from ${from}:`, err.message); + await sendWhatsAppMessage( + from, + 'Sorry, I could not transcribe that voice message. Please try again.' + ).catch(() => {}); + } + } + } catch (err) { + console.error('Webhook processing error:', err.message); + } +}); + +// ── Download media from WhatsApp ───────────────────────────────────────────── +// Two-step process: first get the media URL from the media ID, then download +// the actual file. Both requests need the WhatsApp token for auth. +async function downloadWhatsAppMedia(mediaId) { + // Step 1: Retrieve the download URL from Meta's Graph API. + const mediaInfo = await graphApiGet( + `https://graph.facebook.com/v21.0/${mediaId}`, + process.env.WHATSAPP_TOKEN + ); + const mediaUrl = JSON.parse(mediaInfo).url; + + if (!mediaUrl) { + throw new Error('No download URL in media info response'); + } + + // Step 2: Download the actual audio bytes. + // Meta's CDN URL is short-lived (~5 minutes) — download immediately. + return downloadBuffer(mediaUrl, process.env.WHATSAPP_TOKEN); +} + +function graphApiGet(url, token) { + return new Promise((resolve, reject) => { + const parsedUrl = new URL(url); + const options = { + hostname: parsedUrl.hostname, + path: parsedUrl.pathname + parsedUrl.search, + headers: { Authorization: `Bearer ${token}` }, + }; + https.get(options, (res) => { + if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { + return graphApiGet(res.headers.location, token).then(resolve, reject); + } + if (res.statusCode !== 200) { + const chunks = []; + res.on('data', c => chunks.push(c)); + res.on('end', () => reject(new Error( + `HTTP ${res.statusCode} from Graph API: ${Buffer.concat(chunks).toString()}` + ))); + return; + } + const chunks = []; + res.on('data', c => chunks.push(c)); + res.on('end', () => resolve(Buffer.concat(chunks).toString())); + res.on('error', reject); + }).on('error', reject); + }); +} + +function downloadBuffer(url, token) { + return new Promise((resolve, reject) => { + const parsedUrl = new URL(url); + const options = { + hostname: parsedUrl.hostname, + path: parsedUrl.pathname + parsedUrl.search, + headers: { Authorization: `Bearer ${token}` }, + }; + https.get(options, (res) => { + if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { + return downloadBuffer(res.headers.location, token).then(resolve, reject); + } + if (res.statusCode !== 200) { + return reject(new Error(`HTTP ${res.statusCode} downloading media`)); + } + const chunks = []; + res.on('data', c => chunks.push(c)); + res.on('end', () => resolve(Buffer.concat(chunks))); + res.on('error', reject); + }).on('error', reject); + }); +} + +// ── Transcribe with Deepgram ───────────────────────────────────────────────── +async function transcribeAudio(buffer) { + // SDK v5: transcribeFile() accepts a Buffer directly with flat options. + // WhatsApp voice notes are Opus-encoded in an OGG container — Deepgram + // auto-detects the codec so no explicit encoding param is needed. + const data = await deepgram.listen.v1.media.transcribeFile(buffer, { + model: 'nova-3', + smart_format: true, + // Detect sentiment on the voice message — useful for customer service bots. + // Access via: data.results.sentiments.segments + detect_language: true, + tag: 'deepgram-examples', + }); + + // data.results.channels[0].alternatives[0].transcript + const transcript = data?.results?.channels?.[0]?.alternatives?.[0]?.transcript; + if (!transcript || transcript.trim().length === 0) { + return null; + } + return transcript; +} + +// ── Send reply via WhatsApp Cloud API ──────────────────────────────────────── +function sendWhatsAppMessage(to, body) { + return new Promise((resolve, reject) => { + const payload = JSON.stringify({ + messaging_product: 'whatsapp', + to, + type: 'text', + text: { body }, + }); + + const options = { + hostname: 'graph.facebook.com', + path: `/v21.0/${process.env.WHATSAPP_PHONE_NUMBER_ID}/messages`, + method: 'POST', + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${process.env.WHATSAPP_TOKEN}`, + 'Content-Length': Buffer.byteLength(payload), + }, + }; + + const req = https.request(options, (res) => { + const chunks = []; + res.on('data', c => chunks.push(c)); + res.on('end', () => { + if (res.statusCode >= 200 && res.statusCode < 300) { + resolve(Buffer.concat(chunks).toString()); + } else { + reject(new Error( + `WhatsApp send failed (HTTP ${res.statusCode}): ${Buffer.concat(chunks).toString()}` + )); + } + }); + res.on('error', reject); + }); + + req.on('error', reject); + req.write(payload); + req.end(); + }); +} + +app.listen(PORT, () => { + console.log(`WhatsApp voice transcription server running on port ${PORT}`); + console.log('Configure your Meta webhook URL to point to /webhook on this server.'); +}); + +module.exports = { app, transcribeAudio, sendWhatsAppMessage }; diff --git a/examples/260-whatsapp-voice-transcription-node/tests/test.js b/examples/260-whatsapp-voice-transcription-node/tests/test.js new file mode 100644 index 0000000..b8cbd90 --- /dev/null +++ b/examples/260-whatsapp-voice-transcription-node/tests/test.js @@ -0,0 +1,85 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); + +// ── Credential check ───────────────────────────────────────────────────────── +// Exit code convention across all examples in this repo: +// 0 = all tests passed +// 1 = real test failure (code bug, assertion error, unexpected API response) +// 2 = missing credentials (expected in CI until secrets are configured) +const envExample = path.join(__dirname, '..', '.env.example'); +const required = fs.readFileSync(envExample, 'utf8') + .split('\n') + .filter(l => /^[A-Z][A-Z0-9_]+=/.test(l.trim())) + .map(l => l.split('=')[0].trim()); + +const missing = required.filter(k => !process.env[k]); +if (missing.length > 0) { + console.error(`MISSING_CREDENTIALS: ${missing.join(',')}`); + process.exit(2); +} +// ───────────────────────────────────────────────────────────────────────────── + +// We can't run the full WhatsApp webhook server in CI (needs Meta webhook +// verification and real WhatsApp messages), but we CAN verify: +// 1. Deepgram API key works for STT (pre-recorded file transcription) +// 2. Express is installed and importable + +const { DeepgramClient } = require('@deepgram/sdk'); + +const KNOWN_AUDIO_URL = 'https://dpgr.am/spacewalk.wav'; +const EXPECTED_WORDS = ['spacewalk', 'astronaut', 'nasa']; + +async function testDeepgramSTT() { + console.log('Testing Deepgram pre-recorded STT (nova-3)...'); + + const deepgram = new DeepgramClient({ apiKey: process.env.DEEPGRAM_API_KEY }); + + // SDK v5: flat single options object, throws on error. + const data = await deepgram.listen.v1.media.transcribeUrl({ + url: KNOWN_AUDIO_URL, + model: 'nova-3', + smart_format: true, + tag: 'deepgram-examples', + }); + + const transcript = data?.results?.channels?.[0]?.alternatives?.[0]?.transcript; + + if (!transcript || transcript.length < 20) { + throw new Error(`Transcript too short or empty: "${transcript}"`); + } + + const lower = transcript.toLowerCase(); + const found = EXPECTED_WORDS.filter(w => lower.includes(w)); + if (found.length === 0) { + throw new Error(`Expected words not found in: "${transcript.substring(0, 200)}"`); + } + + console.log(` Transcript received (${transcript.length} chars)`); + console.log(` Expected content verified (found: ${found.join(', ')})`); +} + +function testExpressInstalled() { + console.log('Testing express import...'); + const express = require('express'); + if (typeof express !== 'function') { + throw new Error('express default export is not a function'); + } + console.log(' express loaded successfully'); +} + +async function run() { + testExpressInstalled(); + await testDeepgramSTT(); +} + +run() + .then(() => { + console.log('\nAll tests passed'); + process.exit(0); + }) + .catch(err => { + console.error(`\nTest failed: ${err.message}`); + process.exit(1); + }); From f80595f40e0f6c841b03326fd67dc84d0a1aca14 Mon Sep 17 00:00:00 2001 From: Luke Oliff Date: Thu, 2 Apr 2026 14:17:50 +0100 Subject: [PATCH 2/2] fix(examples): renumber 330-whatsapp-voice-transcription-node to avoid collision with concurrent builds --- .../.env.example | 0 .../README.md | 0 .../package.json | 0 .../src/server.js | 0 .../tests/test.js | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename examples/{260-whatsapp-voice-transcription-node => 330-whatsapp-voice-transcription-node}/.env.example (100%) rename examples/{260-whatsapp-voice-transcription-node => 330-whatsapp-voice-transcription-node}/README.md (100%) rename examples/{260-whatsapp-voice-transcription-node => 330-whatsapp-voice-transcription-node}/package.json (100%) rename examples/{260-whatsapp-voice-transcription-node => 330-whatsapp-voice-transcription-node}/src/server.js (100%) rename examples/{260-whatsapp-voice-transcription-node => 330-whatsapp-voice-transcription-node}/tests/test.js (100%) diff --git a/examples/260-whatsapp-voice-transcription-node/.env.example b/examples/330-whatsapp-voice-transcription-node/.env.example similarity index 100% rename from examples/260-whatsapp-voice-transcription-node/.env.example rename to examples/330-whatsapp-voice-transcription-node/.env.example diff --git a/examples/260-whatsapp-voice-transcription-node/README.md b/examples/330-whatsapp-voice-transcription-node/README.md similarity index 100% rename from examples/260-whatsapp-voice-transcription-node/README.md rename to examples/330-whatsapp-voice-transcription-node/README.md diff --git a/examples/260-whatsapp-voice-transcription-node/package.json b/examples/330-whatsapp-voice-transcription-node/package.json similarity index 100% rename from examples/260-whatsapp-voice-transcription-node/package.json rename to examples/330-whatsapp-voice-transcription-node/package.json diff --git a/examples/260-whatsapp-voice-transcription-node/src/server.js b/examples/330-whatsapp-voice-transcription-node/src/server.js similarity index 100% rename from examples/260-whatsapp-voice-transcription-node/src/server.js rename to examples/330-whatsapp-voice-transcription-node/src/server.js diff --git a/examples/260-whatsapp-voice-transcription-node/tests/test.js b/examples/330-whatsapp-voice-transcription-node/tests/test.js similarity index 100% rename from examples/260-whatsapp-voice-transcription-node/tests/test.js rename to examples/330-whatsapp-voice-transcription-node/tests/test.js