From ec55fdfb9d4d08b23980f2518b362cdb2ba5ad5a Mon Sep 17 00:00:00 2001 From: Chi Date: Wed, 15 Apr 2026 13:50:02 -0700 Subject: [PATCH] web-client: formant-weighted radial waveform for speaking state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stacks on merged PR #338. Makes the 24-bar radial waveform respond to phoneme changes, not just overall amplitude. Top-3 peak bins in the frequency spectrum act as formant proxies (approximating F1/F2/F3); bars within 6 bins of a peak get up to 1.8× their raw height. The ring shape visibly rotates/morphs when the vowel changes ("ahhh" → "eee"), reading as "the avatar is articulating" rather than "a VU meter next to the avatar." No new deps. No CSS changes. No asset changes. ~40 added lines inside the existing `startSpeakingDetection()` closure in src/web-client.ts. Preserves the existing graceful-degradation path: silence → `findPeaks` leaves all peakIdx=-1 → boost=1.0 → identical to prior behavior. Ring stays in the canvas margin (radii 24–30) outside the 44×44 image — proven visible at current display size. Bigger SVG-overlay redesign for the hero screen is tracked as a separate follow-up per the Mini/MacBook two-bot consensus today. Spec at notes/avatar-formant-waveform-spec.md. Research memo: notes/avatar-animation-research.md (Option D). Verified: - npx tsc --noEmit --skipLibCheck clean - Embedded browser-JS extracted and `node --check`-ed cleanly (per feedback_web_client_embedded_js_no_ts rule — no TS syntax inside template literal) Co-Authored-By: Claude Opus 4.6 (1M context) --- src/web-client.ts | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/src/web-client.ts b/src/web-client.ts index 533a7ca..a74fdac 100644 --- a/src/web-client.ts +++ b/src/web-client.ts @@ -882,6 +882,29 @@ function startSpeakingDetection() { var smoothed = 0; var NUM_BARS = 24; var CX = 30, CY = 30, INNER = 24, OUTER = 30; // canvas center and radii + // Formant-weighting: find top-3 peak bins (approximate F1/F2/F3) per frame, + // then boost bars near those bins so the ring shape visibly shifts with + // vowel changes instead of just pulsing louder. + var K = 3; + var peakIdx = new Int16Array(K); + var peakVal = new Uint8Array(K); + function findPeaks() { + for (var k = 0; k < K; k++) { peakIdx[k] = -1; peakVal[k] = 0; } + // Skip bin 0 (DC) and last bin (Nyquist) — noisy, not formant-bearing. + for (var i = 1; i < buf.length - 1; i++) { + if (buf[i] < buf[i - 1] || buf[i] <= buf[i + 1]) continue; + var v = buf[i]; + for (var k = 0; k < K; k++) { + if (v > peakVal[k]) { + for (var j = K - 1; j > k; j--) { + peakVal[j] = peakVal[j - 1]; peakIdx[j] = peakIdx[j - 1]; + } + peakVal[k] = v; peakIdx[k] = i; + break; + } + } + } + } function tick() { speakingRAF = requestAnimationFrame(tick); if (!analyserNode) return; @@ -897,9 +920,22 @@ function startSpeakingDetection() { if (ctx && canvas) { ctx.clearRect(0, 0, canvas.width, canvas.height); if (speaking) { + findPeaks(); var step = buf.length / NUM_BARS; for (var i = 0; i < NUM_BARS; i++) { - var val = buf[Math.floor(i * step)] / 255; + var binIdx = Math.floor(i * step); + var raw = buf[binIdx] / 255; + // Bars within 6 bins of any peak get up to 1.8x their raw height. + // Bars far from any peak stay at their raw value. Fall back to + // pure amplitude (boost=1.0) when no peaks were found (silence). + var minDist = 999; + for (var k = 0; k < K; k++) { + if (peakIdx[k] < 0) continue; + var d = Math.abs(binIdx - peakIdx[k]); + if (d < minDist) minDist = d; + } + var boost = minDist >= 6 ? 1.0 : 1.0 + (1 - minDist / 6) * 0.8; + var val = Math.min(1.0, raw * boost); var barLen = 2 + val * 6; // 2px min, 8px max var angle = (i / NUM_BARS) * Math.PI * 2 - Math.PI / 2; var x1 = CX + Math.cos(angle) * INNER;