+ {hit ? "✓ Correct — " : "✗ Not quite — the answer is "}
+ {correctChoice?.label}
+
+
{q.explain}
+
+ )}
+
+ {phase === "revealed" && (
+
+
+
+ )}
+
+
+ );
+}
diff --git a/apps/web/src/components/play/Sandbox.tsx b/apps/web/src/components/play/Sandbox.tsx
new file mode 100644
index 0000000..d997a5b
--- /dev/null
+++ b/apps/web/src/components/play/Sandbox.tsx
@@ -0,0 +1,297 @@
+"use client";
+
+// Amdahl & Gustafson sandbox — pure-client, no gateway needed.
+// Drag the serial-fraction slider; the chart and metrics update instantly.
+
+import { useState } from "react";
+
+// ── Math helpers ──────────────────────────────────────────────────────────────
+const amdahl = (s: number, p: number): number => s > 0 ? 1 / (s + (1 - s) / p) : p;
+const gustafson = (s: number, p: number): number => p * (1 - s) + s;
+
+function niceStep(max: number): number {
+ const raw = max / 5;
+ const exp = Math.floor(Math.log10(Math.max(raw, 1e-9)));
+ const frac = raw / Math.pow(10, exp);
+ const nice = frac < 1.5 ? 1 : frac < 3.5 ? 2 : frac < 7.5 ? 5 : 10;
+ return nice * Math.pow(10, exp);
+}
+
+function fmt1(v: number): string {
+ if (!isFinite(v)) return "∞";
+ return v >= 10000 ? `${(v / 1000).toFixed(0)}k` : v >= 100 ? v.toFixed(0) : v >= 10 ? v.toFixed(1) : v.toFixed(2);
+}
+
+type Mode = "amdahl" | "gustafson" | "both";
+const MAX_P_OPTS = [8, 24, 64, 256] as const;
+
+// ── SVG chart ─────────────────────────────────────────────────────────────────
+function SandboxChart({ serial, maxP, mode }: { serial: number; maxP: number; mode: Mode }) {
+ const W = 500, H = 230;
+ const ml = 44, mr = 14, mt = 14, mb = 28;
+ const pw = W - ml - mr, ph = H - mt - mb;
+ const logMax = Math.log(maxP);
+
+ // X: log-linear Y: linear
+ const tx = (p: number) => ml + (Math.log(Math.max(p, 1)) / logMax) * pw;
+ const ty = (y: number) => mt + (1 - Math.min(y, yMax * 1.01) / yMax) * ph;
+
+ // Y axis ceiling
+ const aMax = serial > 0 ? 1 / serial : maxP;
+ const gMax = maxP * (1 - serial) + serial;
+
+ let yMax: number;
+ if (mode === "gustafson") yMax = gMax * 1.08;
+ else if (mode === "both") yMax = Math.min(Math.max(aMax * 1.5, 4), maxP);
+ else yMax = Math.min(aMax * 1.15, maxP * 1.06);
+
+ // 100-point smooth curves in log space
+ const curve = (fn: (p: number) => number) =>
+ Array.from({ length: 100 }, (_, i) => {
+ const p = Math.exp((logMax * i) / 99);
+ return { x: p, y: fn(p) };
+ });
+
+ const aPts = curve((p) => amdahl(serial, p));
+ const gPts = curve((p) => gustafson(serial, p));
+
+ const toSVGPath = (pts: { x: number; y: number }[]) =>
+ pts.map((p, i) => `${i === 0 ? "M" : "L"}${tx(p.x).toFixed(1)},${ty(p.y).toFixed(1)}`).join(" ");
+
+ // Ideal line (y=p), clipped to chart top
+ const idealTop = Math.min(maxP, yMax);
+ const idealPath = `M${tx(1).toFixed(1)},${ty(1).toFixed(1)} L${tx(idealTop).toFixed(1)},${ty(idealTop).toFixed(1)}`;
+
+ // X ticks: powers of 2
+ const xTicks: number[] = [];
+ for (let p = 1; p <= maxP; p *= 2) xTicks.push(p);
+ if (xTicks[xTicks.length - 1] < maxP) xTicks.push(maxP);
+
+ // Y ticks
+ const step = niceStep(yMax);
+ const yTicks: number[] = [];
+ for (let y = step; y < yMax * 0.99; y += step) yTicks.push(y);
+
+ // Amdahl asymptote (only if it fits inside the chart)
+ const showAsymp = serial > 0 && aMax < yMax * 0.97 && (mode === "amdahl" || mode === "both");
+
+ const cy = mt + ph / 2;
+
+ return (
+
+ );
+}
+
+// ── Main component ────────────────────────────────────────────────────────────
+export default function Sandbox() {
+ const [serial, setSerial] = useState(0.20);
+ const [maxP, setMaxP] = useState(24);
+ const [mode, setMode] = useState("amdahl");
+
+ const aMax = serial > 0 ? 1 / serial : Infinity;
+ const spAmdahl = amdahl(serial, maxP);
+ const spGust = gustafson(serial, maxP);
+ const effAmdahl = spAmdahl / maxP;
+
+ // Threads needed to reach 90% of Amdahl ceiling: n = 9*(1-s)/s
+ const nFor90 = serial > 0 ? Math.ceil(9 * (1 - serial) / serial) : Infinity;
+
+ function summaryText(): string {
+ if (serial === 0)
+ return "Zero serial code — every thread does only useful work. Speedup equals core count: doubling cores halves the runtime forever. Real programs always have some serial fraction, but this is the target.";
+ if (serial >= 0.5)
+ return "More than half the program is serial. The ceiling is ≤ 2× no matter how many cores you throw at it. More hardware cannot fix a serial bottleneck — you must parallelize the code first.";
+ if (aMax < 4)
+ return `With ${(serial*100).toFixed(0)}% serial code the ceiling is only ${fmt1(aMax)}×. Adding a hundred more cores won't help. This is a code problem, not a hardware problem.`;
+ if (nFor90 <= maxP)
+ return `You need ${nFor90} threads to reach 90% of the ${fmt1(aMax)}× ceiling — achievable on your ${maxP}-core system. You're spending hardware budget well.`;
+ return `The ${fmt1(aMax)}× ceiling is attractive, but you'd need ${fmt1(nFor90)} threads to reach 90% of it. On ${maxP} cores you realise ${fmt1(spAmdahl)}× (${(effAmdahl*100).toFixed(0)}% of ceiling). Reducing the serial fraction from ${(serial*100).toFixed(0)}% to even ${(serial*50).toFixed(1)}% would double the ceiling.`;
+ }
+
+ const MODES: { id: Mode; label: string }[] = [
+ { id: "amdahl", label: "Amdahl" },
+ { id: "gustafson", label: "Gustafson" },
+ { id: "both", label: "Both" },
+ ];
+
+ const sColor = serial > 0.2 ? "var(--bad)" : serial > 0.05 ? "var(--warn)" : "var(--good)";
+
+ return (
+
+
+
Amdahl & Gustafson — interactive sandbox
+
+ Set the serial fraction and watch the theoretical speedup ceiling update. Switch to Gustafson's law to see how scaling the problem with the hardware changes the picture entirely.
+
+ Speedup at {maxP} threads (Gustafson)
+ {fmt1(spGust)}×
+
+ )}
+
+
+ {/* ── Plain-English explanation ── */}
+
+
+ {mode === "gustafson" ? "Gustafson's view" : mode === "both" ? "The key difference" : "What this means"}
+
+
+ {mode === "gustafson"
+ ? <>Gustafson's law says: if you scale the problem size with the hardware (more threads → bigger dataset, same wall time), speedup grows near-linearly regardless of the serial fraction. It doesn't contradict Amdahl — they answer different questions. Amdahl: “how much faster for the same work?” Gustafson: “how much more work in the same time?”>
+ : mode === "both"
+ ? <>The gap between the curves is the scalability premium from growing the problem with hardware. When work is fixed (Amdahl), the serial fraction is an iron ceiling. When the problem scales (Gustafson), even significant serial fractions allow near-linear throughput growth — which is why clusters with thousands of nodes are useful in practice.>
+ : summaryText()
+ }
+
+
+
+
+ );
+}
diff --git a/apps/web/src/components/play/quiz-questions.ts b/apps/web/src/components/play/quiz-questions.ts
new file mode 100644
index 0000000..df0d296
--- /dev/null
+++ b/apps/web/src/components/play/quiz-questions.ts
@@ -0,0 +1,146 @@
+// Guess-the-Bottleneck: six archetypal scaling curves the student must identify.
+// Each entry is self-contained: curve data + choices + answer + explanation.
+// No gateway needed — all data is pre-generated from analytical models.
+
+export type Choice = { id: string; label: string };
+export type Question = {
+ id: string;
+ prompt: string;
+ context: string;
+ curve: { x: number; y: number }[];
+ choices: Choice[];
+ answer: string;
+ explain: string;
+};
+
+const T = [1, 2, 4, 8, 12, 16, 20, 24] as const;
+
+// Amdahl's law S(p) = 1 / (s + (1-s)/p)
+const ahl = (s: number): { x: number; y: number }[] =>
+ T.map((p) => ({ x: p, y: +(1 / (s + (1 - s) / p)).toFixed(2) }));
+
+export const QUESTIONS: Question[] = [
+ // ── 1. Near-perfect ────────────────────────────────────────────────────────
+ {
+ id: "perfect",
+ prompt: "What does this scaling curve tell you?",
+ context:
+ "24 independent tasks — each thread works on a completely separate slice of a large array and never touches any shared data.",
+ curve: T.map((p) => ({ x: p, y: +(p * (1 - 0.002 * (p - 1))).toFixed(2) })),
+ choices: [
+ { id: "perfect", label: "Near-perfect (embarrassingly parallel)" },
+ { id: "amdahl", label: "Serial fraction — Amdahl's law" },
+ { id: "bw", label: "Memory bandwidth saturation" },
+ { id: "sync", label: "Synchronization contention" },
+ ],
+ answer: "perfect",
+ explain:
+ "Near-linear speedup — every core does proportional useful work, with only minor scheduling overhead. No shared state, no synchronization, and the working set still fits in cache. This is the gold standard every parallel programmer aims for.",
+ },
+
+ // ── 2. Synchronization contention ──────────────────────────────────────────
+ {
+ id: "sync",
+ prompt: "What does this scaling curve tell you?",
+ context:
+ "All 24 threads increment a single shared global counter on every loop iteration, each acquisition guarded by #pragma omp atomic.",
+ curve: [
+ { x: 1, y: 1.00 }, { x: 2, y: 1.04 }, { x: 4, y: 0.96 },
+ { x: 8, y: 0.84 }, { x: 12, y: 0.78 }, { x: 16, y: 0.74 },
+ { x: 20, y: 0.71 }, { x: 24, y: 0.69 },
+ ],
+ choices: [
+ { id: "falsesh", label: "False sharing" },
+ { id: "sync", label: "Synchronization contention" },
+ { id: "imbal", label: "Load imbalance" },
+ { id: "bw", label: "Memory bandwidth saturation" },
+ ],
+ answer: "sync",
+ explain:
+ "Every core must queue for the lock on every iteration — queuing time grows faster than the work done. 24 threads on one atomic counter ends up slower than 1 thread alone. The fix is a reduction: each thread keeps a private partial sum and OpenMP combines them once at the end.",
+ },
+
+ // ── 3. Amdahl's law ────────────────────────────────────────────────────────
+ {
+ id: "amdahl",
+ prompt: "What does this scaling curve tell you?",
+ context:
+ "The kernel has a setup phase that always runs on a single thread (file I/O + initialisation), then a fully parallel compute section. Past ~8 threads the speedup barely improves.",
+ curve: ahl(0.10),
+ choices: [
+ { id: "bw", label: "Memory bandwidth saturation" },
+ { id: "amdahl", label: "Serial fraction — Amdahl's law" },
+ { id: "imbal", label: "Load imbalance" },
+ { id: "perfect", label: "Near-perfect scaling" },
+ ],
+ answer: "amdahl",
+ explain:
+ "The curve follows Amdahl's law with ~10% serial code: S(N) = 1 ∕ (0.10 + 0.90∕N). No matter how many cores you add, the serial 10% dominates — the theoretical ceiling is 10× regardless of core count. The only fix is to parallelize or eliminate the serial section.",
+ },
+
+ // ── 4. False sharing ───────────────────────────────────────────────────────
+ {
+ id: "falsesh",
+ prompt: "What does this scaling curve tell you?",
+ context:
+ "Each thread writes only to its own counter — no logical sharing — but all counters are packed next to each other inside one 64-byte cache line.",
+ curve: [
+ { x: 1, y: 1.00 }, { x: 2, y: 1.62 }, { x: 4, y: 2.18 },
+ { x: 8, y: 2.48 }, { x: 12, y: 2.31 }, { x: 16, y: 2.09 },
+ { x: 20, y: 1.91 }, { x: 24, y: 1.74 },
+ ],
+ choices: [
+ { id: "amdahl", label: "Serial fraction — Amdahl's law" },
+ { id: "falsesh", label: "False sharing" },
+ { id: "perfect", label: "Near-perfect scaling" },
+ { id: "sync", label: "Synchronization contention" },
+ ],
+ answer: "falsesh",
+ explain:
+ "Initial gains look promising, but every write by one core invalidates the same cache line in every other core's L1 cache (MESI protocol). As thread count grows, this coherence traffic escalates and the speedup reverses. The fingerprint is a peak followed by degradation. Fix: pad each counter to its own cache line.",
+ },
+
+ // ── 5. Memory bandwidth saturation ────────────────────────────────────────
+ {
+ id: "bw",
+ prompt: "What does this scaling curve tell you?",
+ context:
+ "A memory-intensive stencil kernel. Each thread streams through a large array — the working set is far too big to fit in any level of cache.",
+ curve: [
+ { x: 1, y: 1.00 }, { x: 2, y: 1.88 }, { x: 4, y: 3.45 },
+ { x: 8, y: 3.78 }, { x: 12, y: 3.90 }, { x: 16, y: 3.94 },
+ { x: 20, y: 3.97 }, { x: 24, y: 3.98 },
+ ],
+ choices: [
+ { id: "imbal", label: "Load imbalance" },
+ { id: "sync", label: "Synchronization contention" },
+ { id: "bw", label: "Memory bandwidth saturation" },
+ { id: "amdahl", label: "Serial fraction — Amdahl's law" },
+ ],
+ answer: "bw",
+ explain:
+ "DRAM bandwidth is shared across all cores — once ~3–4 cores saturate the memory bus, extra cores idle waiting for data. The flat ceiling at ~4× is the bandwidth wall, not a code quality issue. The Roofline model predicts exactly this. Fix: tiling or prefetching to reuse data from cache before adding more threads.",
+ },
+
+ // ── 6. Load imbalance ─────────────────────────────────────────────────────
+ {
+ id: "imbal",
+ prompt: "What does this scaling curve tell you?",
+ context:
+ "A triangular loop: iteration i costs O(i²) work. The N iterations are divided into equal-sized static chunks — the last threads get the heaviest iterations.",
+ curve: [
+ { x: 1, y: 1.00 }, { x: 2, y: 1.71 }, { x: 4, y: 2.85 },
+ { x: 8, y: 4.22 }, { x: 12, y: 5.10 }, { x: 16, y: 5.52 },
+ { x: 20, y: 5.63 }, { x: 24, y: 5.65 },
+ ],
+ choices: [
+ { id: "bw", label: "Memory bandwidth saturation" },
+ { id: "falsesh", label: "False sharing" },
+ { id: "amdahl", label: "Serial fraction — Amdahl's law" },
+ { id: "imbal", label: "Load imbalance" },
+ ],
+ answer: "imbal",
+ explain:
+ "With schedule(static), the last threads receive the heaviest iterations. Every other thread finishes and idles while the unlucky ones grind through the expensive tail — total runtime equals the slowest thread. Fix: schedule(dynamic) or schedule(guided) lets idle threads steal remaining work and balances the load.",
+ },
+];
diff --git a/docs/02-roadmap.md b/docs/02-roadmap.md
index f4a088b..b520072 100644
--- a/docs/02-roadmap.md
+++ b/docs/02-roadmap.md
@@ -137,12 +137,11 @@ the Slurm bridge for real clusters. These bare-metal/cloud nodes also unlock **t
**DoD:** an instructor runs a class through a "speed up this code" challenge backed by real
cluster execution.
-**Status (2026-06-17) — gamification layer partially landed.** The `/play` hub, head-to-head
-kernel race, predict-before-you-run mechanic (across Start / Playground / Flagship), and a
-badge + streak system for correct predictions are built and merged. A guess-the-bottleneck quiz
-and an Amdahl & Gustafson interactive sandbox are in the next PR (`feat/badges-pr30`).
-Engineering domain modules (FEM/FDTD/CFD), classrooms/LMS, K8s autoscaling, and PMU counters
-remain future work.
+**Status (2026-06-17) — gamification layer complete.** The `/play` hub ships four games:
+**⚡ Race** (head-to-head kernel comparison), **🧩 Quiz** (guess-the-bottleneck),
+**🔬 Sandbox** (Amdahl & Gustafson interactive explorer), and **🏅 Badges** (predict-before-
+you-run reward system across Start / Playground / Flagship). Engineering domain modules
+(FEM/FDTD/CFD), classrooms/LMS, K8s autoscaling, and PMU counters remain future work.
---
diff --git a/packages/explain/src/index.ts b/packages/explain/src/index.ts
index d0e1c97..82f6ce2 100644
--- a/packages/explain/src/index.ts
+++ b/packages/explain/src/index.ts
@@ -10,8 +10,8 @@ type ExplainFn = (r: ExperimentResult) => Explanation;
export const Explain: Record = {
falseSharing(r) {
- const c = r.current, peak = r.peak!, padded = r.params.padded;
- if (padded)
+ const c = r.current, peak = r.peak, padded = r.params.padded;
+ if (padded || !peak)
return {
sev: "info",
what: `Padded: speedup reaches ${fmt(c.speedup, 1)}× at ${c.x} threads — close to the ideal ${c.x}×.`,
@@ -22,7 +22,7 @@ export const Explain: Record = {
return {
sev: "critical",
what: `Speedup peaks at only ${fmt(peak.speedup, 1)}× around ${peak.x} threads, then falls to ${fmt(c.speedup, 1)}× at ${c.x}. Adding threads makes it slower.`,
- why: `All ${c.x} counters share one 64-byte cache line. Every increment invalidates that line in every other core, so it ping-pongs across cores over the bus — coherence traffic grows ~linearly with thread count and now dominates (${fmt(r.coh!, 0)} ms of the ${fmt(c.time, 0)} ms runtime).`,
+ why: `All ${c.x} counters share one 64-byte cache line. Every increment invalidates that line in every other core, so it ping-pongs across cores over the bus — coherence traffic grows ~linearly with thread count and now dominates (${fmt(r.coh ?? 0, 0)} ms of the ${fmt(c.time, 0)} ms runtime).`,
how: `Pad/align each thread's counter to its own cache line (64-byte alignment, or per-thread padded struct). Flip "Pad to cache line".`,
exp: `Near-linear speedup restored (~${c.x}× at ${c.x} threads); coherence stall → ~0%.`,
};
@@ -82,8 +82,8 @@ export const Explain: Record = {
};
},
mpiHalo(r) {
- const c = r.current, weak = r.params.mode === "weak", commPct = r.idlePct ?? 0, peak = r.peak!;
- if (weak)
+ const c = r.current, weak = r.params.mode === "weak", commPct = r.idlePct ?? 0, peak = r.peak;
+ if (weak || !peak)
return {
sev: c.efficiency > 0.7 ? "info" : "warn",
what: `Weak scaling: as the grid grows with the ranks, scaled speedup reaches ${fmt(c.speedup, 1)}× on ${c.x} ranks at ${fmt(c.efficiency * 100, 0)}% efficiency — close to ideal.`,