diff --git a/PinSub.py b/PinSub.py
index 823d2c6..d5af808 100644
--- a/PinSub.py
+++ b/PinSub.py
@@ -26,10 +26,16 @@
 """
 
 import argparse
+import hashlib
+import http.client
 import json
+import os
+import platform
 import re
+import socket
 import subprocess
 import sys
+import time
 from dataclasses import dataclass, field
 from pathlib import Path
 
@@ -74,10 +80,14 @@ def find_names_file(names_arg: Path | None, mkv_path: Path | None) -> Path | Non
     return None
 
 
-def load_names(names_path: Path | None) -> tuple[dict[str, str], dict[str, str]]:
-    """Load (name_map, english_name_map) from a per-film JSON, or empty pair."""
+def load_names(names_path: Path | None) -> tuple[dict[str, str], dict[str, str], dict]:
+    """Load (name_map, english_name_map, per_film_glossary) from a per-film JSON.
+
+    Returns empty containers if the file is missing. The per-film glossary,
+    when present, is a mapping {hanzi -> {"english": str, "context": str, "tags": [...]}}.
+    Underscore-prefixed keys are filtered out at each level."""
     if names_path is None:
-        return {}, {}
+        return {}, {}, {}
     try:
         with names_path.open(encoding="utf-8") as f:
             data = json.load(f)
@@ -85,7 +95,53 @@ def load_names(names_path: Path | None) -> tuple[dict[str, str], dict[str, str]]
         sys.exit(f"failed to load names file {names_path}: {e}")
     name_map = {k: v for k, v in (data.get("name_map") or {}).items() if not k.startswith("_")}
     english_name_map = {k: v for k, v in (data.get("english_name_map") or {}).items() if not k.startswith("_")}
-    return name_map, english_name_map
+    per_film_glossary = {k: v for k, v in (data.get("glossary") or {}).items() if not k.startswith("_")}
+    return name_map, english_name_map, per_film_glossary
+
+
+# Default location for the global glossary (shipped publicly).
+DEFAULT_GLOSSARY_PATH = Path(__file__).parent / "glossary.json"
+
+
+def load_glossary(path: Path | None = None) -> dict[str, dict]:
+    """Load the global Chinese→English glossary. Entries are
+    {hanzi -> {"english": str, "context": str, "tags": [...]}}.
+    Underscore-prefixed keys (helps, format docs) are filtered out."""
+    p = path or DEFAULT_GLOSSARY_PATH
+    if not p.exists():
+        return {}
+    try:
+        with p.open(encoding="utf-8") as f:
+            data = json.load(f)
+    except (OSError, json.JSONDecodeError) as e:
+        print(f"warn: could not load glossary {p}: {e}", file=sys.stderr)
+        return {}
+    return {k: v for k, v in data.items() if not k.startswith("_") and isinstance(v, dict)}
+
+
+def merge_glossaries(global_g: dict, per_film_g: dict) -> dict:
+    """Combine global + per-film glossary. Per-film entries override global on key collisions."""
+    out = dict(global_g)
+    out.update(per_film_g)
+    return out
+
+
+# Default location for TIMMY's system-prompt file. PinSub loads this at runtime
+# and uses it as the system prompt for translator-role calls. If absent, the
+# fallback inline string below is used.
+DEFAULT_TIMMY_PROMPT_PATH = Path(__file__).parent / "README_TIMMY.md"
+
+
+def load_timmy_prompt(path: Path | None = None) -> str | None:
+    """Read the TIMMY system-prompt file. Returns None if missing/unreadable."""
+    p = path or DEFAULT_TIMMY_PROMPT_PATH
+    if not p.exists():
+        return None
+    try:
+        return p.read_text(encoding="utf-8").strip()
+    except OSError as e:
+        print(f"warn: could not read {p}: {e}", file=sys.stderr)
+        return None
 
 
 @dataclass
@@ -717,6 +773,1417 @@ def expand(parts: list[str]) -> list[str]:
     return out
 
 
+# ---------- dictionary correction (Phase 1: empty-cue fill from CC-CEDICT) ----------
+#
+# The Chinese subtitles are the source of truth; the existing English row is an
+# imperfect translation, sometimes missing entirely. Phase 1 is conservative:
+# we only fill cues where the merged result has no English line. Existing
+# English is never overwritten in Phase 1 — that's the LLM verifier's job
+# (Phase 2, gated on Arc A770 use per project rule).
+#
+# Every change is logged to <out>.changes.tsv as a side channel so the manual
+# review pass can audit what the automation did.
+
+DEFAULT_CEDICT_PATH = Path(__file__).parent / "Research" / "primary_sources" / "cedict" / "cedict_1_0_ts_utf-8_mdbg.txt"
+
+CEDICT_LINE_RE = re.compile(r"^(\S+)\s+(\S+)\s+\[([^\]]+)\]\s+/(.+)/\s*$")
+
+
+def load_cedict(path: Path) -> dict[str, list[tuple[str, list[str]]]]:
+    """Parse CC-CEDICT into {simplified: [(pinyin, [defs]), ...]}.
+
+    Multi-entry keys happen when the same simplified form has different pinyin
+    readings; we keep them all and pick at lookup time. Definitions inside an
+    entry are split on '/'; CC-CEDICT often has multiple synonymous glosses.
+    """
+    if not path or not path.exists():
+        return {}
+    out: dict[str, list[tuple[str, list[str]]]] = {}
+    try:
+        with path.open(encoding="utf-8") as f:
+            for line in f:
+                if not line or line.startswith("#"):
+                    continue
+                m = CEDICT_LINE_RE.match(line)
+                if not m:
+                    continue
+                _trad, simp, pinyin, defs_str = m.groups()
+                defs = [d for d in defs_str.split("/") if d]
+                out.setdefault(simp, []).append((pinyin, defs))
+    except OSError as e:
+        print(f"warn: could not load cedict {path}: {e}", file=sys.stderr)
+        return {}
+    return out
+
+
+def segment_greedy(text: str, cedict: dict, max_len: int = 12) -> list[str]:
+    """Left-to-right longest-match segmentation against CC-CEDICT keys.
+
+    Hanzi runs are segmented by longest-match; non-Hanzi (punctuation, ASCII,
+    spaces) pass through one character at a time. max_len caps the lookup
+    window — most CC-CEDICT entries are <=8 chars; a generous 12 covers all
+    practical cases without quadratic blowup.
+    """
+    if not text:
+        return []
+    if not cedict:
+        return list(text)
+    out: list[str] = []
+    i = 0
+    n = len(text)
+    while i < n:
+        ch = text[i]
+        if HAN_RE.match(ch):
+            matched = False
+            for length in range(min(max_len, n - i), 0, -1):
+                cand = text[i:i + length]
+                if cand in cedict:
+                    out.append(cand)
+                    i += length
+                    matched = True
+                    break
+            if not matched:
+                # Single-char Hanzi without a multi-char hit; emit it (may still be in cedict as a single-char entry).
+                out.append(ch)
+                i += 1
+        else:
+            out.append(ch)
+            i += 1
+    return out
+
+
+def gloss_segment(seg: str, cedict: dict) -> str | None:
+    """Return the first definition for a Hanzi segment, or None if unmappable.
+
+    For multi-entry segments (e.g., 行 has multiple pinyin readings) the
+    rough heuristic is "pick the entry with the most definitions" — that
+    correlates loosely with "most common reading." Phase 2 LLM verifier
+    will pick smarter; Phase 1 just needs a plausible default.
+    """
+    if seg not in cedict:
+        return None
+    entries = cedict[seg]
+    entries_sorted = sorted(entries, key=lambda e: -len(e[1]))
+    pinyin, defs = entries_sorted[0]
+    if not defs:
+        return None
+    return defs[0]
+
+
+# Some CC-CEDICT definitions carry editorial annotations like "(slang)",
+# "(Tw)", "(literary)", "(courteous, as opposed to informal 你[ni3])", or
+# "see X". For a learner subtitle line we want the essential gloss, not
+# the metadata. Strip the most common patterns aggressively.
+GLOSS_LEADING_PAREN_RE = re.compile(r"^\(([A-Za-z][^)]{0,80})\)\s*")
+GLOSS_PINYIN_BRACKETS_RE = re.compile(r"\[[a-zA-Z0-9 ]+\]")
+GLOSS_INNER_ANNOTATION_RE = re.compile(r"\s*\((?:lit\.|fig\.|abbr\.|coll\.|slang|literary|formal|courteous|informal|archaic|dialect|usu\.|esp\.|see also|see|CL:[^)]+|as opposed to[^)]*|equivalent to[^)]*|same as[^)]*|written form of[^)]*|variant of[^)]*|usually|especially|by extension|extended meaning|of [^)]*|sound of [^)]*|interjection [^)]*)[^)]*\)")
+GLOSS_SEE_ALSO_RE = re.compile(r"\s*;?\s*see\s+(also\s+)?\S+", flags=re.I)
+GLOSS_CL_RE = re.compile(r"\s*\(CL:[^)]+\)")
+
+# Sentence-ending particles common in dialogue. Map to empty string so they
+# don't pollute the gloss; punctuation already conveys the sentence end.
+PARTICLE_OVERRIDES = {
+    "啦": "",
+    "啊": "",
+    "呀": "",
+    "哦": "",
+    "哎": "",
+    "嗨": "",
+    "嘛": "",
+    "呢": "",
+    "吧": "",
+    "嗯": "",
+    "唉": "",
+    "哟": "",
+    "嘿": "",
+    "诶": "",
+    "了": "",   # aspect particle "le"; rarely useful as standalone gloss
+    "的": "",   # possessive/genitive marker
+    "得": "",   # complement marker
+    "地": "",   # adverbial marker
+    "着": "",   # progressive aspect marker
+    "过": "",   # experiential aspect marker
+    "把": "",   # disposal marker
+}
+
+# Common verbs that are nearly always imperatives in subtitle dialogue.
+# CC-CEDICT lists them as "to X" (infinitive); rewrite to bare imperative
+# when the cue context suggests a command (short cue, ends with !, or
+# single verb cue).
+IMPERATIVE_LEMMA_OVERRIDES = {
+    "停": "Stop",
+    "走": "Go",
+    "来": "Come",
+    "去": "Go",
+    "等": "Wait",
+    "看": "Look",
+    "听": "Listen",
+    "请": "Please",
+    "起": "Get up",
+    "坐": "Sit",
+    "进": "Enter",
+    "出": "Get out",
+    "让开": "Move aside",
+    "小心": "Be careful",
+    "别动": "Don't move",
+    "不要": "Don't",
+    "快点": "Hurry",
+    "快": "Hurry",
+}
+
+
+def clean_gloss(gloss: str) -> str:
+    """Trim a CC-CEDICT definition for inline display."""
+    g = gloss.strip()
+    # Drop a leading parenthetical annotation if present (may be quite long).
+    while True:
+        m = GLOSS_LEADING_PAREN_RE.match(g)
+        if not m:
+            break
+        g = g[m.end():].strip()
+    # Drop bracketed pinyin like [ni3] anywhere in the text.
+    g = GLOSS_PINYIN_BRACKETS_RE.sub("", g)
+    # Drop inner editorial annotations enclosed in parens.
+    g = GLOSS_INNER_ANNOTATION_RE.sub("", g)
+    # Drop any remaining "(CL:…)" classifier hints.
+    g = GLOSS_CL_RE.sub("", g)
+    # Drop trailing "see also X" / "see X" cross-references.
+    g = GLOSS_SEE_ALSO_RE.sub("", g)
+    # Tidy spacing.
+    g = re.sub(r"\s+", " ", g).strip()
+    g = re.sub(r"\s+([,.!?;:])", r"\1", g)
+    # If the gloss reduces to ";" or "," fragments, normalize.
+    g = g.strip(" ;,")
+    return g
+
+
+def gloss_hanzi(hanzi: str, cedict: dict, name_map: dict[str, str] | None = None) -> str:
+    """Build a literal English gloss from a Hanzi line.
+
+    Newlines in the Hanzi are preserved (each line glossed independently).
+    Unmappable Hanzi appear as `[?]` so a human reviewer can spot them.
+
+    If name_map is provided, segments matching a known proper noun use the
+    name_map entry instead of the CC-CEDICT literal gloss — so 秀莲 stays
+    "Xiùlián" (the character) rather than "beautiful lotus" (the literal).
+    name_map values are tone-marked pinyin; for the English row we strip
+    the tone marks and capitalize the syllable starts so the gloss matches
+    the english_name_map convention used elsewhere.
+    """
+    if not hanzi or not cedict:
+        return ""
+    # Build a name-segmentation seed. We want longest-first matching of
+    # name_map keys to take precedence over generic CC-CEDICT segmentation.
+    name_keys = sorted((name_map or {}).keys(), key=len, reverse=True) if name_map else []
+
+    out_lines: list[str] = []
+    for hline in hanzi.split("\n"):
+        # First pass: walk the line, replacing name-keys with sentinels,
+        # then segment the remainder via greedy CC-CEDICT match.
+        # Simpler: do a per-position check for name keys before each
+        # CC-CEDICT longest-match.
+        parts: list[str] = []
+        i = 0
+        n = len(hline)
+        while i < n:
+            ch = hline[i]
+            if HAN_RE.match(ch):
+                # Try name_map (longest first).
+                matched = False
+                for nk in name_keys:
+                    if hline.startswith(nk, i):
+                        parts.append(_name_to_english(name_map[nk]))
+                        i += len(nk)
+                        matched = True
+                        break
+                if matched:
+                    continue
+                # Fall through to CC-CEDICT longest-match.
+                for length in range(min(12, n - i), 0, -1):
+                    cand = hline[i:i + length]
+                    if cand in cedict:
+                        # Particle override: drop entirely.
+                        if cand in PARTICLE_OVERRIDES:
+                            override = PARTICLE_OVERRIDES[cand]
+                            if override:
+                                parts.append(override)
+                            i += length
+                            matched = True
+                            break
+                        # Imperative override: emit the bare command form
+                        # when the surrounding cue is short and verb-led.
+                        if cand in IMPERATIVE_LEMMA_OVERRIDES:
+                            parts.append(IMPERATIVE_LEMMA_OVERRIDES[cand])
+                            i += length
+                            matched = True
+                            break
+                        g = gloss_segment(cand, cedict)
+                        if g:
+                            cleaned = clean_gloss(g)
+                            if cleaned:
+                                parts.append(cleaned)
+                        else:
+                            parts.append(f"[{cand}?]")
+                        i += length
+                        matched = True
+                        break
+                if not matched:
+                    parts.append(f"[{ch}?]")
+                    i += 1
+            else:
+                parts.append(ch)
+                i += 1
+        line = " ".join(p for p in parts if p.strip()).strip()
+        line = re.sub(r"\s+", " ", line)
+        line = re.sub(r"\s+([,.!?;:])", r"\1", line)
+        out_lines.append(line)
+    return "\n".join(out_lines).strip()
+
+
+def _name_to_english(tone_pinyin: str) -> str:
+    """Convert a name_map entry (e.g. 'Lǐ Mùbái') to bare English form ('Li Mubai').
+
+    Strip tone marks via NFKD decomposition + filter combining chars.
+    Capitalization is preserved (the consonant-side capital is what we want).
+    """
+    import unicodedata
+    nfkd = unicodedata.normalize("NFKD", tone_pinyin)
+    return "".join(c for c in nfkd if not unicodedata.combining(c))
+
+
+def enrich_cues(cues: list[TriCue], cedict: dict, changelog_path: Path | None,
+                name_map: dict[str, str] | None = None
+                ) -> tuple[list[TriCue], set[int], list[tuple]]:
+    """Apply Phase-1 dictionary correction.
+
+    Only fills empty English cues; never overwrites existing English. Logs
+    every change to <out>.changes.tsv for review.
+
+    Returns (cues, set_of_filled_cue_indices, changelog_rows). The filled
+    indices are what Phase 2 LLM verifier targets — those are the cues
+    whose English originated from CC-CEDICT and most need natural-language
+    cleanup.
+    """
+    if not cedict:
+        print("enrich: no dictionary loaded; skipping")
+        return cues, set(), []
+    changes: list[tuple] = []
+    filled: set[int] = set()
+    for c in cues:
+        if c.hanzi and not c.english.strip():
+            gloss = gloss_hanzi(c.hanzi, cedict, name_map=name_map)
+            if gloss:
+                changes.append((c.index, "FILL", c.hanzi.replace("\n", " | "), "", gloss))
+                c.english = gloss
+                filled.add(c.index)
+    if changelog_path is not None and changes:
+        try:
+            with changelog_path.open("w", encoding="utf-8", newline="") as f:
+                f.write("idx\taction\thanzi\tbefore\tafter\n")
+                for row in changes:
+                    f.write("\t".join(str(x).replace("\t", " ").replace("\n", " | ") for x in row) + "\n")
+            print(f"enrich: {len(changes)} cue(s) filled; changelog -> {changelog_path.name}")
+        except OSError as e:
+            print(f"warn: could not write changelog {changelog_path}: {e}", file=sys.stderr)
+    else:
+        print(f"enrich: {len(changes)} cue(s) filled")
+    return cues, filled, changes
+
+
+# ---------- Phase 2: LLM verifier via llama.cpp-vulkan on Arc A770 ----------
+#
+# Phase 1 (dictionary) fills empty cues with literal CC-CEDICT gloss; quality
+# is bimodal. Phase 2 sends the filled cues (and optionally divergent cues)
+# to a local llama.cpp HTTP server for natural-language correction.
+#
+# Pattern matches the owner's TIMMY backend convention: spawn `llama-server`
+# with Vulkan device + auto gpu-layers, talk over HTTP, kill on exit.
+# Per project rule (03_PROJECT.md workflow preferences), every A770 dispatch
+# is announced and logged to Logs/A770_usage.md.
+
+LLAMA_HEALTH_TIMEOUT = 180   # seconds; first-call JIT can take a while
+LLAMA_REQUEST_TIMEOUT = 120
+LLAMA_DEFAULT_PORT = 8765    # avoid colliding with TIMMY on 8080
+
+LLM_SYSTEM_PROMPT = (
+    "You translate Chinese film subtitles into English for a Mandarin learner. "
+    "The Chinese is the source of truth. Produce an idiomatic English line that "
+    "matches the Chinese meaning. Be short — one line fits on screen. "
+    "Do NOT explain. Do NOT include the Chinese text. Do NOT add quotes or "
+    "labels. Output ONLY the English translation. If the Chinese is a single "
+    "interjection or name, output its English equivalent only. /no_think"
+)
+
+
+@dataclass
+class LlamaSession:
+    """Live llama-server handle plus metadata for the A770 usage log."""
+    proc: subprocess.Popen
+    port: int
+    model_path: Path
+    server_exe: Path
+    device: str | None
+    started_at: float
+    first_call_at: float | None = None
+    last_call_at: float | None = None
+    calls: int = 0
+    total_completion_tokens: int = 0
+    total_prompt_tokens: int = 0
+
+
+def find_arc_vulkan_device(server_exe: Path) -> str | None:
+    """Run `llama-server --list-devices` and return the Arc device id (e.g. 'Vulkan0')."""
+    try:
+        out = subprocess.run(
+            [str(server_exe), "--list-devices"],
+            capture_output=True, text=True, timeout=30, check=False,
+        )
+    except (OSError, subprocess.SubprocessError) as e:
+        print(f"warn: --list-devices failed: {e}", file=sys.stderr)
+        return None
+    blob = (out.stdout or "") + "\n" + (out.stderr or "")
+    # llama-server prints lines like: "  Vulkan0: Intel(R) Arc(TM) A770 Graphics (...)
+    for line in blob.splitlines():
+        if "Arc" in line and "Vulkan" in line:
+            m = re.search(r"(Vulkan\d+)", line)
+            if m:
+                return m.group(1)
+    return None
+
+
+def wait_for_llama_health(port: int, timeout: int = LLAMA_HEALTH_TIMEOUT) -> bool:
+    """Poll /health on the local server until 200 or timeout."""
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        try:
+            conn = http.client.HTTPConnection("127.0.0.1", port, timeout=3)
+            conn.request("GET", "/health")
+            r = conn.getresponse()
+            conn.close()
+            if r.status == 200:
+                return True
+        except (OSError, http.client.HTTPException):
+            pass
+        time.sleep(1)
+    return False
+
+
+def start_llama_server(server_exe: Path, model_path: Path,
+                       port: int = LLAMA_DEFAULT_PORT,
+                       device: str | None = None,
+                       n_gpu_layers: str = "auto",
+                       ctx_size: int = 4096,
+                       n_threads: int = 4) -> LlamaSession:
+    """Spawn llama-server. Returns a LlamaSession; caller is responsible for stop_llama_server().
+
+    n_threads caps CPU usage. Vulkan/GPU workloads spend most time on the GPU;
+    CPU threads are for tokenization, scheduling, and any CPU-resident layers.
+    Default 4 leaves room for other CPU work."""
+    if not server_exe.exists():
+        sys.exit(f"missing llama-server: {server_exe}")
+    if not model_path.exists():
+        sys.exit(f"missing GGUF model: {model_path}")
+    cmd = [str(server_exe), "-m", str(model_path),
+           "--port", str(port),
+           "--ctx-size", str(ctx_size),
+           "--gpu-layers", n_gpu_layers,
+           "--threads", str(n_threads),
+           "--threads-batch", str(n_threads),
+           "--no-warmup"]   # we'll measure first-call JIT ourselves
+    if device:
+        cmd += ["--device", device]
+    # 🔴 STARTING A SERVER — per Master §1.6
+    print(f"\n🔴 STARTING llama-server on port {port}")
+    print(f"   model:  {model_path.name}")
+    print(f"   device: {device or '(auto)'}")
+    print(f"   ctx:    {ctx_size}")
+    print(f"   cmd:    {' '.join(cmd)}\n")
+    proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                            text=True, encoding="utf-8", errors="replace",
+                            bufsize=1)
+    started = time.time()
+    # Print server output in the background while waiting for health.
+    if not wait_for_llama_health(port):
+        # Capture whatever the server emitted so we can diagnose.
+        if proc.stdout is not None:
+            try:
+                # Non-blocking-ish: read what's available.
+                proc.terminate()
+                try:
+                    captured, _ = proc.communicate(timeout=3)
+                except subprocess.TimeoutExpired:
+                    proc.kill()
+                    captured, _ = proc.communicate()
+                print(captured[-2000:] if captured else "(no output)", file=sys.stderr)
+            except (OSError, ValueError):
+                pass
+        sys.exit("llama-server failed to come up within timeout")
+    print(f"llama-server ready on :{port} after {time.time() - started:.1f}s")
+    return LlamaSession(
+        proc=proc, port=port, model_path=model_path, server_exe=server_exe,
+        device=device, started_at=started,
+    )
+
+
+def stop_llama_server(session: LlamaSession) -> None:
+    """Terminate the server and verify the port is free. Per Master §1.6."""
+    if session.proc.poll() is None:
+        try:
+            session.proc.terminate()
+            session.proc.wait(timeout=10)
+        except subprocess.TimeoutExpired:
+            session.proc.kill()
+            session.proc.wait(timeout=5)
+    # Verify port is free.
+    try:
+        with socket.create_connection(("127.0.0.1", session.port), timeout=2):
+            print(f"warn: port {session.port} still bound after kill", file=sys.stderr)
+            return
+    except OSError:
+        pass  # port is free, which is what we want
+    print(f'\nYour honor, the server has been double-confirmed to be closed')
+
+
+def llm_complete(session: LlamaSession, system_prompt: str, user_prompt: str,
+                 max_tokens: int = 96, temperature: float = 0.2) -> str | None:
+    """POST to /v1/chat/completions; return content or None on failure."""
+    body = json.dumps({
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        "max_tokens": max_tokens,
+        "temperature": temperature,
+        "stream": False,
+    }).encode("utf-8")
+    try:
+        conn = http.client.HTTPConnection("127.0.0.1", session.port, timeout=LLAMA_REQUEST_TIMEOUT)
+        conn.request("POST", "/v1/chat/completions", body=body,
+                     headers={"Content-Type": "application/json"})
+        r = conn.getresponse()
+        if r.status != 200:
+            print(f"llm error: HTTP {r.status} — {r.read()[:200]}", file=sys.stderr)
+            conn.close()
+            return None
+        data = json.loads(r.read().decode("utf-8"))
+        conn.close()
+    except (OSError, json.JSONDecodeError, KeyError) as e:
+        print(f"llm error: {e}", file=sys.stderr)
+        return None
+    now = time.time()
+    session.calls += 1
+    if session.first_call_at is None:
+        session.first_call_at = now
+    session.last_call_at = now
+    usage = data.get("usage") or {}
+    session.total_completion_tokens += int(usage.get("completion_tokens") or 0)
+    session.total_prompt_tokens += int(usage.get("prompt_tokens") or 0)
+    try:
+        msg = data["choices"][0]["message"]
+    except (KeyError, IndexError):
+        return None
+    content = (msg.get("content") or "").strip()
+    if content:
+        return content
+    # Qwen3 reasoning mode: if /no_think failed and `content` is empty, the
+    # answer may still live inside `reasoning_content` (a stream of thoughts).
+    # We try to extract a final-line subtitle from it. Last resort.
+    reasoning = (msg.get("reasoning_content") or "").strip()
+    if reasoning:
+        # Look for an explicit quoted answer first.
+        m = re.search(r'"([^"]{2,80})"', reasoning)
+        if m:
+            return m.group(1).strip()
+        # Otherwise the last non-empty line is the best guess.
+        for line in reversed(reasoning.splitlines()):
+            s = line.strip().strip('"').strip("'").strip()
+            if 2 <= len(s) <= 120:
+                return s
+    return None
+
+
+def _clean_llm_output(text: str) -> str:
+    """Defensive cleanup of LLM output."""
+    if not text:
+        return ""
+    t = text.strip()
+    # Strip wrapping quotes if the model added them.
+    if (t.startswith('"') and t.endswith('"')) or (t.startswith("'") and t.endswith("'")):
+        t = t[1:-1].strip()
+    # Drop trailing model artifacts like "<|im_end|>" or thinking-tag residue.
+    t = re.sub(r"</?(im_start|im_end|think|s)\b[^>]*>", "", t, flags=re.I).strip()
+    # Some Qwen variants prefix the answer with the source language. Drop.
+    t = re.sub(r"^(English|Translation|Subtitle)\s*[:\-]\s*", "", t, flags=re.I)
+    return t
+
+
+def build_verify_prompt(cue: TriCue, dict_suggestion: str,
+                        prior_cues: list[TriCue], name_map: dict[str, str]) -> str:
+    """Build the user-side prompt for cue verification."""
+    parts: list[str] = []
+    parts.append(f"Chinese: {cue.hanzi}")
+    if cue.english.strip() and cue.english.strip() != dict_suggestion:
+        parts.append(f"Existing English (may be wrong or stilted): {cue.english}")
+    if dict_suggestion:
+        parts.append(f"Dictionary literal gloss (often awkward, just a hint): {dict_suggestion}")
+    # Two most recent cues that have Hanzi, for context.
+    ctx = [pc for pc in prior_cues if pc.hanzi][-2:]
+    if ctx:
+        parts.append("Recent context (prior cues):")
+        for pc in ctx:
+            parts.append(f"  {pc.hanzi} → {pc.english}")
+    # Name hints if any name appears in this cue.
+    hints: list[str] = []
+    for h, p in (name_map or {}).items():
+        if h and h in cue.hanzi:
+            hints.append(f"{h} = {_name_to_english(p)}")
+    if hints:
+        parts.append("Known names: " + "; ".join(hints))
+    parts.append("Translate the Chinese above into one short English subtitle line.")
+    return "\n".join(parts)
+
+
+def _cache_key(model_name: str, hanzi: str) -> str:
+    return hashlib.sha1(f"{model_name}|{hanzi}".encode("utf-8")).hexdigest()
+
+
+def load_llm_cache(cache_path: Path) -> dict[str, str]:
+    if not cache_path.exists():
+        return {}
+    try:
+        with cache_path.open(encoding="utf-8") as f:
+            return json.load(f)
+    except (OSError, json.JSONDecodeError):
+        return {}
+
+
+def save_llm_cache(cache_path: Path, cache: dict[str, str]) -> None:
+    try:
+        cache_path.parent.mkdir(parents=True, exist_ok=True)
+        with cache_path.open("w", encoding="utf-8") as f:
+            json.dump(cache, f, ensure_ascii=False, indent=1)
+    except OSError as e:
+        print(f"warn: could not write cache {cache_path}: {e}", file=sys.stderr)
+
+
+def verify_cues_with_llm(cues: list[TriCue], session: LlamaSession,
+                         filled_indices: set[int],
+                         name_map: dict[str, str],
+                         cedict: dict,
+                         cache: dict[str, str],
+                         model_name: str,
+                         changelog_rows: list[tuple]) -> int:
+    """Verify the previously-filled-by-dictionary cues; replace English with LLM output.
+
+    Returns the number of cues replaced.
+    """
+    replaced = 0
+    cue_by_idx = {c.index: c for c in cues}
+    indices_sorted = sorted(filled_indices)
+    print(f"llm verify: {len(indices_sorted)} cue(s) to check")
+    for n, idx in enumerate(indices_sorted, 1):
+        c = cue_by_idx.get(idx)
+        if c is None or not c.hanzi:
+            continue
+        key = _cache_key(model_name, c.hanzi)
+        cached = cache.get(key)
+        if cached is not None:
+            new_text = cached
+            src = "cache"
+        else:
+            # Compute dict suggestion fresh (same logic Phase 1 uses).
+            dict_suggestion = gloss_hanzi(c.hanzi, cedict, name_map=name_map) if cedict else ""
+            # Two cues directly prior in cue order.
+            prior = [cue_by_idx[i] for i in range(max(1, idx - 4), idx) if i in cue_by_idx]
+            prompt = build_verify_prompt(c, dict_suggestion, prior, name_map)
+            raw = llm_complete(session, LLM_SYSTEM_PROMPT, prompt)
+            new_text = _clean_llm_output(raw) if raw else ""
+            if not new_text:
+                print(f"  cue {idx}: no LLM output; keeping existing", file=sys.stderr)
+                continue
+            cache[key] = new_text
+            src = "llm"
+        if new_text and new_text != c.english.strip():
+            changelog_rows.append((c.index, f"VERIFY({src})", c.hanzi.replace("\n", " | "),
+                                   c.english, new_text))
+            c.english = new_text
+            replaced += 1
+        if n % 5 == 0 or n == len(indices_sorted):
+            print(f"  ...{n}/{len(indices_sorted)} cue(s) processed")
+    return replaced
+
+
+def append_changelog(changelog_path: Path, rows: list[tuple]) -> None:
+    """Append (or create) a TSV log of all enrichment actions."""
+    if not rows:
+        return
+    header_needed = not changelog_path.exists()
+    try:
+        with changelog_path.open("a", encoding="utf-8", newline="") as f:
+            if header_needed:
+                f.write("idx\taction\thanzi\tbefore\tafter\n")
+            for row in rows:
+                f.write("\t".join(str(x).replace("\t", " ").replace("\n", " | ") for x in row) + "\n")
+    except OSError as e:
+        print(f"warn: could not write changelog {changelog_path}: {e}", file=sys.stderr)
+
+
+def write_a770_usage_log(log_path: Path, session: LlamaSession, *,
+                         job: str, cues_processed: int, cues_replaced: int,
+                         source_session_path: Path | None,
+                         success: bool, notes: str = "") -> None:
+    """Append a structured row to Logs/A770_usage.md per the project rule."""
+    log_path.parent.mkdir(parents=True, exist_ok=True)
+    elapsed = time.time() - session.started_at
+    jit_compile = ((session.first_call_at - session.started_at)
+                   if session.first_call_at else None)
+    inference_window = ((session.last_call_at - session.first_call_at)
+                        if (session.first_call_at and session.last_call_at) else 0.0)
+    completion_tps = (session.total_completion_tokens / inference_window
+                      if inference_window > 0 else 0.0)
+    header = (
+        "| time (UTC) | project | job | model | quant | n_ctx | n_gpu_layers "
+        "| device | calls | prompt_tok | completion_tok | completion_tps | jit_s "
+        "| total_wall_s | success | notes | session_log |"
+    )
+    sep = "|" + "|".join(["---"] * 16) + "|"
+    if not log_path.exists():
+        log_path.write_text(
+            "# A770 usage log — PS\n\n"
+            "Per project rule (`03_PROJECT.md` Owner workflow preferences): every A770 dispatch is logged.\n\n"
+            f"{header}\n{sep}\n",
+            encoding="utf-8",
+        )
+    quant = ""
+    m = re.search(r"-(Q\d[A-Za-z0-9_]*)\.gguf$", session.model_path.name, re.I)
+    if m:
+        quant = m.group(1)
+    now_utc = time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
+    src = source_session_path.name if source_session_path else ""
+    row_cells = [
+        now_utc, "PS", job, session.model_path.name, quant,
+        "4096", "auto", session.device or "(auto)",
+        str(session.calls), str(session.total_prompt_tokens),
+        str(session.total_completion_tokens),
+        f"{completion_tps:.1f}",
+        f"{jit_compile:.1f}" if jit_compile is not None else "-",
+        f"{elapsed:.1f}",
+        "yes" if success else "no",
+        notes.replace("|", "/").replace("\n", " "),
+        src,
+    ]
+    with log_path.open("a", encoding="utf-8") as f:
+        f.write("| " + " | ".join(row_cells) + " |\n")
+
+
+# ---------- Phase 3: Python-orchestrated translate-then-compare correction ----------
+#
+# Bigger-scope correction than --llm-verify. Where Phase 2 only re-translates
+# the cues Phase 1 filled, Phase 3 also goes after cues whose EXISTING English
+# diverges from what the Chinese says.
+#
+# Pipeline per cue:
+#   1. Python heuristic (content-word overlap + length ratio + empty check)
+#      decides whether the cue is worth sending to the LLM. This keeps TIMMY
+#      from re-translating the 90% of cues that are already fine.
+#   2. TIMMY-translate (fresh prompt, no existing English in context) produces
+#      a Chinese-faithful candidate. Per-cue cache.
+#   3. If existing English exists and differs from TIMMY's, a SEPARATE TIMMY
+#      call (fresh context, different prompt) judges: A=existing, B=TIMMY's,
+#      or C=write a better one. Per-cue cache.
+#   4. Python applies name_map enforcement on the chosen text and logs.
+
+# Common English stop words — excluded from content-word overlap calculations.
+_STOP_WORDS = {
+    "a", "an", "the", "is", "are", "was", "were", "be", "been", "being", "am",
+    "in", "on", "at", "of", "for", "with", "to", "from", "by", "as", "into",
+    "and", "or", "but", "nor", "so", "if", "then", "than",
+    "i", "you", "he", "she", "it", "we", "they",
+    "me", "him", "her", "us", "them",
+    "my", "your", "his", "hers", "its", "our", "ours", "their", "theirs",
+    "do", "does", "did", "doing", "done",
+    "have", "has", "had", "having",
+    "what", "who", "whom", "where", "when", "why", "how", "which",
+    "this", "that", "these", "those",
+    "lit", "fig", "interj",
+    "not", "no",
+    "any", "all", "some", "much", "many", "more", "most", "few", "less", "least",
+    "very", "too", "so", "just",
+    "let", "lets",
+}
+
+
+def _content_words(text: str) -> set[str]:
+    """Lowercased English content words, stopwords/short words removed."""
+    if not text:
+        return set()
+    words = re.findall(r"[A-Za-z]+", text.lower())
+    return {w for w in words if len(w) >= 2 and w not in _STOP_WORDS}
+
+
+def detect_suspect_english(c: TriCue, cedict: dict, name_map: dict | None,
+                           overlap_threshold: float = 0.2) -> tuple[bool, str]:
+    """Python heuristic: does this cue's existing English likely diverge from the Chinese?
+
+    Returns (is_suspect, reason). Cheap (no LLM); meant to prefilter so TIMMY
+    only sees cues that actually need attention.
+    """
+    if not c.hanzi:
+        return False, "no-hanzi"
+    existing = c.english.strip()
+    if not existing:
+        return True, "empty"
+    if not cedict:
+        return False, "no-dict"
+    # Compute the dict gloss (literal content words).
+    gloss = gloss_hanzi(c.hanzi, cedict, name_map=name_map)
+    if not gloss:
+        return False, "no-gloss"
+    gloss_words = _content_words(gloss)
+    if not gloss_words:
+        # Gloss reduces to particles/empty; nothing to compare against.
+        return False, "thin-gloss"
+    en_words = _content_words(existing)
+    # Add tone-stripped name-map English forms to the gloss vocabulary too —
+    # that way "Xiu Lian" matches the existing English referring to the character.
+    for h, p in (name_map or {}).items():
+        if h and h in c.hanzi:
+            gloss_words |= _content_words(_name_to_english(p))
+    if not gloss_words:
+        return False, "thin-gloss"
+    overlap = len(en_words & gloss_words) / max(1, len(gloss_words))
+    if overlap < overlap_threshold:
+        return True, f"low-overlap({overlap:.2f})"
+    # Length ratio sanity check on long-ish cues.
+    han_chars = len([ch for ch in c.hanzi if HAN_RE.match(ch)])
+    en_len = len(existing)
+    if han_chars >= 6 and en_len < han_chars * 1.0:
+        return True, f"english-too-short({en_len}/{han_chars})"
+    if han_chars >= 4 and en_len > han_chars * 12:
+        return True, f"english-too-long({en_len}/{han_chars})"
+    return False, "ok"
+
+
+# Fallback inline system prompt used when README_TIMMY.md is absent. The real
+# prompt lives in README_TIMMY.md next to PinSub.py and is loaded at startup.
+LLM_FRESH_SYSTEM_PROMPT = (
+    "You translate Chinese film subtitles into English for a Mandarin learner. "
+    "The Chinese is the source of truth. Translate every Chinese content word "
+    "(noun, verb, name, modifier) — function words (particles, articles) follow "
+    "each language's grammar. Preserve Chinese word order when grammatically "
+    "tolerable in English. Use the 'Known names' and 'Glossary' hints if "
+    "provided. NEVER leave Chinese characters in your output — if unsure of a "
+    "term, output [?] for it. Output one short English line. No quotes, no "
+    "labels, no explanation. /no_think"
+)
+
+
+def _build_glossary_hints(hanzi: str, glossary: dict) -> list[tuple[str, str]]:
+    """Return [(hanzi_term, english_hint)] for every glossary key found in this cue.
+
+    Longest keys first — so "天下第一枪" (compound) matches before "枪" alone."""
+    if not hanzi or not glossary:
+        return []
+    hits: list[tuple[str, str]] = []
+    for key in sorted(glossary.keys(), key=len, reverse=True):
+        if key in hanzi:
+            entry = glossary[key]
+            english_hint = (entry.get("english") or "").strip() if isinstance(entry, dict) else str(entry)
+            if english_hint:
+                hits.append((key, english_hint))
+    return hits
+
+
+def _detect_name_candidates(hanzi: str, pinyin: str, name_map: dict | None) -> list[tuple[str, str]]:
+    """Returns [(hanzi_span, english_form)] for likely name spans in the cue.
+
+    When name_map covers a span, use it (canonical). When not, fall back to a
+    bare-pinyin form derived from the cue's already-computed pinyin. This is
+    the generic path for films without a name_map — TIMMY gets a pinyin-derived
+    hint per likely-name span and can use it verbatim."""
+    if not hanzi:
+        return []
+    out: list[tuple[str, str]] = []
+    # Apply name_map first (longest first).
+    if name_map:
+        for h in sorted(name_map.keys(), key=len, reverse=True):
+            if h and h in hanzi:
+                out.append((h, _name_to_english(name_map[h])))
+    # No generic auto-detection for now: pypinyin doesn't reliably identify
+    # names from raw text, and false positives ("walk" picked as surname) would
+    # poison the hints. Instead, we surface the *entire* pinyin row to TIMMY
+    # in the prompt (see build_translate_user_prompt) — TIMMY can use the
+    # pinyin syllable structure to spell any names that weren't in name_map.
+    return out
+
+
+def build_translate_user_prompt(c: TriCue, prior_cues: list[TriCue],
+                                name_hints: list[tuple[str, str]],
+                                glossary_hits: list[tuple[str, str]],
+                                feedback: str | None = None) -> str:
+    """Assemble the user-side prompt for a translator-role TIMMY call."""
+    parts: list[str] = []
+    ctx = [pc for pc in prior_cues if pc.hanzi][-2:]
+    if ctx:
+        parts.append("Recent scene context (prior cues for awareness):")
+        for pc in ctx:
+            en = pc.english.strip().replace("\n", " ") or "(no english)"
+            parts.append(f"  {pc.hanzi} → {en}")
+    if name_hints:
+        parts.append("Known names in this cue: " + "; ".join(
+            f"{h}={en}" for h, en in name_hints))
+    if glossary_hits:
+        parts.append("Glossary (use these English forms for the Chinese terms below):")
+        for h, hint in glossary_hits:
+            parts.append(f"  {h} → {hint}")
+    if c.pinyin:
+        parts.append(f"Pinyin row: {c.pinyin}")
+    parts.append(f"Chinese: {c.hanzi}")
+    if feedback:
+        parts.append(f"Your previous attempt had these issues — fix them:\n{feedback}")
+    parts.append("Translate the Chinese above into one short English subtitle line.")
+    return "\n".join(parts)
+
+
+def llm_translate_fresh(session: LlamaSession, c: TriCue,
+                        prior_cues: list[TriCue], name_map: dict,
+                        glossary: dict, system_prompt: str,
+                        feedback: str | None = None) -> str | None:
+    """Fresh TIMMY call: translate Chinese only, without seeing existing English."""
+    name_hints = _detect_name_candidates(c.hanzi, c.pinyin, name_map)
+    glossary_hits = _build_glossary_hints(c.hanzi, glossary)
+    user = build_translate_user_prompt(c, prior_cues, name_hints, glossary_hits, feedback)
+    raw = llm_complete(session, system_prompt, user)
+    return _clean_llm_output(raw) if raw else None
+
+
+LLM_COMPARE_SYSTEM_PROMPT = (
+    "You judge English subtitle translations for fidelity to a Chinese source. "
+    "Given the Chinese and two English candidates A and B, decide which "
+    "candidate better matches the Chinese meaning, OR write a better English "
+    "translation if both candidates are flawed. Respond with EXACTLY one line:\n"
+    "  A          (candidate A is better)\n"
+    "  B          (candidate B is better)\n"
+    "  C: <text>  (write your own better translation)\n"
+    "Do not explain. Do not include the Chinese. /no_think"
+)
+
+
+def llm_compare_translations(session: LlamaSession, c: TriCue,
+                             en_a: str, en_b: str,
+                             name_map: dict) -> tuple[str, str]:
+    """Fresh TIMMY: compare two English candidates. Returns (verdict, final_text).
+
+    Verdict is "A", "B", or "C" (model wrote a new translation).
+    Parse failures default to A (= keep existing) — least risk of regression.
+    """
+    parts: list[str] = []
+    parts.append(f"Chinese: {c.hanzi}")
+    parts.append(f"A: {en_a}")
+    parts.append(f"B: {en_b}")
+    hints: list[str] = []
+    for h, p in (name_map or {}).items():
+        if h and h in c.hanzi:
+            hints.append(f"{h}={_name_to_english(p)}")
+    if hints:
+        parts.append("Known names: " + "; ".join(hints))
+    parts.append("Which English better matches the Chinese? Answer A, B, or C: <text>.")
+    raw = llm_complete(session, LLM_COMPARE_SYSTEM_PROMPT, "\n".join(parts), max_tokens=80)
+    if not raw:
+        return "A", en_a
+    text = raw.strip()
+    # The model sometimes wraps in extra punctuation. Tolerate "A.", "A!", etc.
+    first = re.match(r"^\s*([ABC])\s*[:.\-—]?\s*(.*)$", text, re.DOTALL)
+    if not first:
+        return "A", en_a
+    verdict = first.group(1).upper()
+    rest = first.group(2).strip()
+    if verdict == "A":
+        return "A", en_a
+    if verdict == "B":
+        return "B", en_b
+    if verdict == "C":
+        # Use whatever follows the C label; fall back if empty.
+        suggestion = _clean_llm_output(rest.split("\n", 1)[0])
+        if suggestion:
+            return "C", suggestion
+        return "A", en_a
+    return "A", en_a
+
+
+def validate_translation(english: str, hanzi: str,
+                         name_hints: list[tuple[str, str]],
+                         glossary_hits: list[tuple[str, str]]
+                         ) -> tuple[bool, list[str]]:
+    """Python post-validation of a TIMMY translation. Returns (ok, feedback_messages).
+
+    Feedback messages are phrased as direct instructions to TIMMY so they can
+    feed back into the retry loop unchanged.
+
+    Checks:
+      1. No Hanzi in the English output.
+      2. Each name in the cue's Hanzi appears in canonical form in the English.
+      3. Length sanity vs Hanzi character count (only on longer cues).
+      4. Glossary coverage is informational — flagged but not rejected (synonym latitude)."""
+    if not english:
+        return False, ["Your output was empty. Translate the Chinese into one English line."]
+    issues: list[str] = []
+
+    # 1. Hanzi in the English row — hard reject.
+    leftover = HAN_RE.findall(english)
+    if leftover:
+        unique = sorted(set(leftover))
+        issues.append(
+            f"Your output still contains Chinese characters: {' '.join(unique)}. "
+            "Translate every Hanzi into English. If you don't know a term, "
+            "output [?] for it instead of leaving the Hanzi."
+        )
+
+    # 2. Name canonicalization. Only fires when name_map produced a hint —
+    #    we don't second-guess the generic name path.
+    en_lower = english.lower()
+    for hanzi_name, canonical_en in name_hints:
+        # Tolerate case-insensitive match anywhere in the line.
+        if canonical_en and canonical_en.lower() not in en_lower:
+            issues.append(
+                f"The name '{hanzi_name}' should appear in your English as "
+                f"'{canonical_en}' (the canonical spelling). Use that exact form."
+            )
+
+    # 3. Length sanity — only on cues with enough Hanzi to be meaningful.
+    han_chars = len([c for c in hanzi if HAN_RE.match(c)])
+    en_len_alpha = len(re.sub(r"[^A-Za-z]", "", english))
+    if han_chars >= 6 and en_len_alpha < han_chars:
+        # Likely you dropped content. Common when subjects/verbs are omitted.
+        issues.append(
+            f"Your English ({en_len_alpha} letters) is short for a "
+            f"{han_chars}-Hanzi cue. Make sure every content word in the "
+            "Chinese has an English equivalent — don't drop nouns or verbs."
+        )
+
+    # 4. (informational) Glossary coverage: we don't reject here, but if NONE
+    #    of the glossary terms made it through, note it for the changelog.
+    # (no-op for now; could feed a softer "consider X" prompt later)
+
+    return len(issues) == 0, issues
+
+
+def correct_cues_with_llm(cues: list[TriCue], session: LlamaSession,
+                          cedict: dict, name_map: dict,
+                          glossary: dict,
+                          system_prompt: str,
+                          mode: str = "divergent",
+                          do_compare: bool = True,
+                          max_cues: int = 0,
+                          max_rounds: int = 3,
+                          cache_path: Path | None = None,
+                          changelog_rows: list[tuple] | None = None
+                          ) -> tuple[int, dict]:
+    """Phase 3 orchestrator. Returns (cues_replaced, per_action_counts)."""
+    if changelog_rows is None:
+        changelog_rows = []
+    cue_by_idx = {c.index: c for c in cues}
+    cues_in_order = sorted(cue_by_idx.keys())
+
+    # Step 1: Python filtering. Compute the divergence reason for every cue
+    # exactly once and reuse it for both selection and the informational
+    # histogram below.
+    candidates: list[tuple[int, str]] = []  # (idx, reason)
+    suspect_counts: dict[str, int] = {}
+    for idx in cues_in_order:
+        c = cue_by_idx[idx]
+        if not c.hanzi:
+            continue
+        suspect, reason = detect_suspect_english(c, cedict, name_map)
+        suspect_counts[reason] = suspect_counts.get(reason, 0) + 1
+        if mode == "fills":
+            if not c.english.strip():
+                candidates.append((idx, "empty"))
+        elif mode == "all":
+            candidates.append((idx, reason))
+        else:  # divergent
+            if suspect:
+                candidates.append((idx, reason))
+    print(f"llm correct: mode={mode}, {len(candidates)} candidate cue(s) selected")
+    print(f"  divergence histogram: {dict(sorted(suspect_counts.items(), key=lambda kv: -kv[1])[:8])}")
+    if max_cues and max_cues > 0 and len(candidates) > max_cues:
+        candidates = candidates[:max_cues]
+        print(f"  --llm-max set: capping at {len(candidates)}")
+
+    # Step 2: caching scaffolding.
+    cache: dict[str, str] = {}
+    if cache_path is not None:
+        cache = load_llm_cache(cache_path)
+    model_name = session.model_path.name
+
+    def t_key(hanzi: str) -> str:
+        return "T|" + hashlib.sha1(f"{model_name}|{hanzi}".encode("utf-8")).hexdigest()
+
+    def c_key(hanzi: str, en_a: str, en_b: str) -> str:
+        return "C|" + hashlib.sha1(f"{model_name}|{hanzi}|{en_a}|{en_b}".encode("utf-8")).hexdigest()
+
+    # Per-action counters.
+    counts = {"translate_only": 0, "kept_existing": 0,
+              "compare_A": 0, "compare_B": 0, "compare_C": 0,
+              "no_change": 0}
+    replaced = 0
+
+    rounds_used_counts = {1: 0, 2: 0, 3: 0, 4: 0}  # 4 = gave up
+
+    for n, (idx, reason) in enumerate(candidates, 1):
+        c = cue_by_idx[idx]
+        existing = c.english.strip()
+
+        # Step 3: TIMMY-translate fresh, with up-to-max_rounds validation retries.
+        tk = t_key(c.hanzi)
+        if tk in cache:
+            en_timmy = cache[tk]
+            rounds_used_counts[1] += 1
+        else:
+            prior = [cue_by_idx[i] for i in range(max(1, idx - 4), idx) if i in cue_by_idx]
+            name_hints = _detect_name_candidates(c.hanzi, c.pinyin, name_map)
+            glossary_hits = _build_glossary_hints(c.hanzi, glossary)
+            feedback: str | None = None
+            en_timmy = ""
+            last_issues: list[str] = []
+            for round_idx in range(1, max_rounds + 1):
+                raw = llm_translate_fresh(
+                    session, c, prior, name_map, glossary, system_prompt, feedback=feedback,
+                )
+                en_timmy = (raw or "").strip()
+                if not en_timmy:
+                    feedback = "Your previous reply was empty. Output one English line."
+                    continue
+                ok, issues = validate_translation(en_timmy, c.hanzi, name_hints, glossary_hits)
+                if ok:
+                    rounds_used_counts[round_idx] = rounds_used_counts.get(round_idx, 0) + 1
+                    break
+                last_issues = issues
+                feedback = "; ".join(issues)
+            else:
+                # Loop fell through max_rounds without an ok pass — keep best-effort output but note it.
+                rounds_used_counts[4] = rounds_used_counts.get(4, 0) + 1
+                if last_issues and changelog_rows is not None:
+                    # Surface the failure in the changelog as a flag.
+                    pass
+            if en_timmy:
+                cache[tk] = en_timmy
+
+        if not en_timmy:
+            counts["no_change"] += 1
+            continue
+
+        # Step 4: decide the final text.
+        if not existing:
+            final = en_timmy
+            verdict = "T"
+            counts["translate_only"] += 1
+        elif existing.lower() == en_timmy.lower():
+            counts["kept_existing"] += 1
+            continue
+        elif not do_compare:
+            # Replace blindly (mode disabled comparison).
+            final = en_timmy
+            verdict = "T"
+            counts["translate_only"] += 1
+        else:
+            ck = c_key(c.hanzi, existing, en_timmy)
+            if ck in cache:
+                cached_str = cache[ck]
+                # Format: "A|text" or "B|text" or "C|text"
+                v = cached_str.split("|", 1)[0] if "|" in cached_str else "A"
+                txt = cached_str.split("|", 1)[1] if "|" in cached_str else existing
+                verdict = v
+                final = txt
+            else:
+                v, f = llm_compare_translations(session, c, existing, en_timmy, name_map)
+                verdict = v
+                final = f
+                cache[ck] = f"{v}|{f}"
+            counts[f"compare_{verdict}"] = counts.get(f"compare_{verdict}", 0) + 1
+
+        if final and final.strip() != c.english.strip():
+            changelog_rows.append((c.index, f"CORRECT({verdict},{reason})",
+                                   c.hanzi.replace("\n", " | "), c.english, final))
+            c.english = final
+            replaced += 1
+        else:
+            counts["no_change"] += 1
+
+        if n % 10 == 0 or n == len(candidates):
+            print(f"  ...{n}/{len(candidates)} cue(s) processed (replaced so far: {replaced})")
+
+    if cache_path is not None:
+        save_llm_cache(cache_path, cache)
+    counts["rounds_used"] = rounds_used_counts  # nested histogram for logging
+    return replaced, counts
+
+
+# ---------- spot-check (visual review of subtitle formatting) ----------
+#
+# The Chinese subtitle is trusted to match the scene. What CAN go wrong is
+# subtitle FORMATTING: a cue too long for the screen, a multi-cue merger that
+# now spans 4+ lines, a "hold-for-reading" extension that overlaps the next
+# beat, fast-dialogue alignment slips. These are all visual problems — Python
+# can SURFACE the suspect cues by heuristic, ffmpeg burns the trilingual sub
+# onto the frame, and an HTML index lets the owner scan a grid quickly.
+#
+# This is path A — "no VLM needed, just frame the candidate." Path B (a vision
+# LLM judging frame+subtitle pairs) layers on top once a VLM GGUF is available.
+
+import html as html_lib
+
+
+def _score_cue_for_spotcheck(c: TriCue) -> tuple[float, list[str]]:
+    """Score a cue's "needs human eyes on it" probability. Returns (score, tags).
+
+    Tags are short labels that explain WHY the cue was picked — they get
+    rendered alongside the frame in the HTML grid."""
+    score = 0.0
+    tags: list[str] = []
+    joined = c.joined()
+    char_count = len(joined)
+    lines = joined.split("\n")
+    line_count = len(lines)
+    duration_ms = max(1, c.end_ms - c.start_ms)
+    longest_line = max((len(ln) for ln in lines), default=0)
+
+    # 1. Long total character count — overflow risk.
+    if char_count > 120:
+        score += (char_count - 120) / 8.0
+        tags.append(f"chars={char_count}")
+    elif char_count > 100:
+        score += (char_count - 100) / 12.0
+
+    # 2. A single line too long for typical subtitle width.
+    if longest_line > 80:
+        score += (longest_line - 80) / 4.0
+        tags.append(f"long-line={longest_line}")
+
+    # 3. Many lines (likely a multi-cue merger).
+    if line_count > 3:
+        score += (line_count - 3) * 6.0
+        tags.append(f"lines={line_count}")
+
+    # 4. Long duration — possible hold-for-reading artifact bleeding into next beat.
+    if duration_ms > 8000:
+        score += min(8.0, (duration_ms - 8000) / 1000.0)
+        tags.append(f"long-dur={duration_ms}ms")
+
+    # 5. Very short duration — fast dialogue, alignment may be wrong.
+    if duration_ms < 700 and char_count > 12:
+        score += 4.0
+        tags.append(f"short-dur={duration_ms}ms")
+
+    # 6. High character-density (chars/sec) — too much text to read in time.
+    char_density = char_count / (duration_ms / 1000.0)
+    if char_density > 30:
+        score += (char_density - 30) / 6.0
+        tags.append(f"dense={char_density:.0f}c/s")
+
+    return score, tags
+
+
+def select_spotcheck_cues(cues: list[TriCue], top_n: int = 12,
+                          cluster_extra: int = 0) -> list[tuple[TriCue, list[str]]]:
+    """Pick the N most likely-suspect cues for visual review.
+
+    Also flags fast-dialogue clusters (cues with small gaps to neighbors) and
+    optionally includes `cluster_extra` cues from the densest cluster to give
+    the reviewer a contextual run of frames."""
+    scored: list[tuple[float, list[str], TriCue]] = []
+    by_index = {c.index: c for c in cues}
+    indexes = sorted(by_index)
+
+    # Add per-cue scores from heuristics.
+    for c in cues:
+        s, tags = _score_cue_for_spotcheck(c)
+        scored.append((s, tags, c))
+
+    # Add cluster signal: cue is "in a fast cluster" when both neighbors are
+    # closer than 400 ms — alignment errors love this regime.
+    for i, idx in enumerate(indexes):
+        c = by_index[idx]
+        prev_c = by_index[indexes[i - 1]] if i > 0 else None
+        next_c = by_index[indexes[i + 1]] if i + 1 < len(indexes) else None
+        gap_prev = c.start_ms - prev_c.end_ms if prev_c else None
+        gap_next = next_c.start_ms - c.end_ms if next_c else None
+        if (gap_prev is not None and gap_prev < 400) and (gap_next is not None and gap_next < 400):
+            # Bump score and append a single fast-cluster tag for this cue.
+            for k, (s, tags, sc) in enumerate(scored):
+                if sc.index == c.index:
+                    scored[k] = (s + 2.5, tags + ["fast-cluster"], sc)
+                    break
+
+    # Sort by score descending, drop zero-score cues.
+    scored.sort(key=lambda t: -t[0])
+    picked: list[tuple[TriCue, list[str]]] = []
+    seen: set[int] = set()
+    for s, tags, c in scored:
+        if s <= 0:
+            break
+        if c.index in seen:
+            continue
+        picked.append((c, tags))
+        seen.add(c.index)
+        if len(picked) >= top_n:
+            break
+    return picked
+
+
+def _ffmpeg_subtitle_path(sub_path: Path) -> str:
+    """Format a subtitle path for ffmpeg's `subtitles=` filter on Windows.
+
+    Backslashes become forward; the drive-colon gets escaped. Paths with
+    spaces must be quoted at the filter level."""
+    p = str(sub_path.resolve()).replace("\\", "/")
+    # Escape the drive-letter colon so ffmpeg doesn't interpret it as a filter option.
+    if len(p) >= 2 and p[1] == ":":
+        p = p[0] + r"\:" + p[2:]
+    # Escape any remaining colons (rare).
+    return p
+
+
+def extract_frame_with_subs(video: Path, sub_path: Path, cue: TriCue,
+                            out_dir: Path) -> Path | None:
+    """Use ffmpeg to grab a single frame at the cue's mid-time with subtitles burned in.
+
+    Returns the .jpg Path on success, None on failure."""
+    if not video.exists():
+        return None
+    mid_sec = (cue.start_ms + cue.end_ms) / 2000.0
+    out = out_dir / f"frame_{cue.index:04d}.jpg"
+    sub_arg = _ffmpeg_subtitle_path(sub_path)
+    # Fast input-seek to ~5 s before the cue, then output-seek to the cue mid-time.
+    # -copyts preserves the original stream PTS so libass (inside the subtitles
+    # filter) sees the real time and renders the right cue. Without -copyts,
+    # input -ss zeros out PTS and the filter shows nothing. We also need -update 1
+    # so ffmpeg writes a single image without complaining about missing %d patterns.
+    pre_sec = max(0.0, mid_sec - 5.0)
+    cmd = [
+        "ffmpeg", "-y", "-loglevel", "error", "-copyts",
+        "-ss", f"{pre_sec:.3f}", "-i", str(video),
+        "-ss", f"{mid_sec:.3f}",
+        "-vf", f"subtitles='{sub_arg}'",
+        "-frames:v", "1", "-q:v", "3", "-update", "1",
+        str(out),
+    ]
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
+    except subprocess.SubprocessError as e:
+        print(f"warn: ffmpeg failed for cue {cue.index}: {e}", file=sys.stderr)
+        return None
+    if result.returncode != 0:
+        # Subtitles filter failed; fall back to no-subtitle frame.
+        cmd_fallback = [
+            "ffmpeg", "-y", "-loglevel", "error",
+            "-ss", f"{mid_sec:.3f}", "-i", str(video),
+            "-frames:v", "1", "-q:v", "3",
+            str(out),
+        ]
+        try:
+            r2 = subprocess.run(cmd_fallback, capture_output=True, text=True, timeout=60)
+            if r2.returncode != 0:
+                print(f"warn: ffmpeg fallback also failed for cue {cue.index}: "
+                      f"{result.stderr[:200]}", file=sys.stderr)
+                return None
+        except subprocess.SubprocessError as e:
+            print(f"warn: ffmpeg fallback exception for cue {cue.index}: {e}", file=sys.stderr)
+            return None
+    return out if out.exists() else None
+
+
+def write_spotcheck_html(picks: list[tuple[TriCue, list[str], Path]],
+                         html_path: Path, film_title: str) -> None:
+    """Render an HTML index of the picked cues with frames + metadata.
+
+    `picks` is a list of (cue, tags, frame_path). frame_path may be None
+    if extraction failed."""
+    # Use absolute file:// URLs for the images so the HTML works regardless of
+    # where it's opened from. Browsers gate this; on Windows it tends to Just Work.
+    rows: list[str] = []
+    for cue, tags, frame in picks:
+        ts = format_ts(cue.start_ms).replace(",", ".")
+        te = format_ts(cue.end_ms).replace(",", ".")
+        dur_ms = cue.end_ms - cue.start_ms
+        joined_html = html_lib.escape(cue.joined()).replace("\n", "<br>")
+        tag_html = " ".join(f'<span class="tag">{html_lib.escape(t)}</span>' for t in tags)
+        if frame and frame.exists():
+            img_url = "file:///" + str(frame.resolve()).replace("\\", "/")
+            img_html = f'<img src="{img_url}" alt="cue {cue.index}">'
+        else:
+            img_html = '<div class="missing">[ffmpeg failed]</div>'
+        rows.append(f"""
+        <div class="cue">
+          <div class="frame">{img_html}</div>
+          <div class="meta">
+            <div class="hdr">#{cue.index} &middot; {ts} → {te} &middot; {dur_ms}ms</div>
+            <div class="tags">{tag_html}</div>
+            <div class="body">{joined_html}</div>
+          </div>
+        </div>""")
+
+    title = html_lib.escape(film_title or "spotcheck")
+    html = f"""<!DOCTYPE html>
+<html lang="en">
+<head>
+<meta charset="utf-8">
+<title>spotcheck — {title}</title>
+<style>
+  body {{ margin: 0; padding: 16px; background: #0e0e10; color: #e6e6e6;
+          font: 13px/1.45 "Segoe UI", system-ui, sans-serif; }}
+  h1 {{ font-size: 16px; margin: 0 0 12px; color: #ffd479; font-weight: 600; }}
+  .summary {{ font-size: 12px; color: #9ba3ad; margin: 0 0 16px; }}
+  .grid {{ display: grid; grid-template-columns: repeat(auto-fill, minmax(360px, 1fr));
+           gap: 14px; }}
+  .cue {{ background: #1a1a1f; border: 1px solid #2c2c33; border-radius: 8px;
+          overflow: hidden; display: flex; flex-direction: column; }}
+  .frame img {{ display: block; width: 100%; height: auto; border-bottom: 1px solid #2c2c33; }}
+  .frame .missing {{ padding: 80px 0; text-align: center; color: #cc6666;
+                     background: #15151a; border-bottom: 1px solid #2c2c33; }}
+  .meta {{ padding: 8px 10px 10px; }}
+  .hdr {{ color: #9ba3ad; font-size: 11px; margin-bottom: 4px; }}
+  .tags {{ margin: 0 0 6px; }}
+  .tag {{ display: inline-block; background: #2a3340; color: #8fc4ff;
+          font-size: 10px; padding: 1px 6px; border-radius: 3px; margin: 0 4px 4px 0; }}
+  .body {{ white-space: pre-wrap; font-size: 13px; color: #e6e6e6; line-height: 1.4; }}
+  .body {{ font-family: "Segoe UI", "Microsoft YaHei", system-ui, sans-serif; }}
+</style>
+</head>
+<body>
+  <h1>spotcheck — {title}</h1>
+  <div class="summary">{len(picks)} cue(s) flagged. Each card shows the frame at the cue's mid-time with the trilingual subtitle burned in, plus the cue's text, timing, and why Python flagged it.</div>
+  <div class="grid">
+  {"".join(rows)}
+  </div>
+</body>
+</html>
+"""
+    html_path.parent.mkdir(parents=True, exist_ok=True)
+    html_path.write_text(html, encoding="utf-8")
+
+
+def run_spotcheck(merged: list[TriCue], out_subtitle: Path, video: Path,
+                  top_n: int = 12) -> None:
+    """Top-level spotcheck workflow. Picks suspect cues, extracts frames,
+    writes an HTML index next to the subtitle output."""
+    picks = select_spotcheck_cues(merged, top_n=top_n)
+    if not picks:
+        print("spotcheck: no cues scored above threshold; nothing to review")
+        return
+    spotcheck_dir = out_subtitle.parent / f"{out_subtitle.stem}.spotcheck"
+    spotcheck_dir.mkdir(parents=True, exist_ok=True)
+    print(f"spotcheck: {len(picks)} cue(s) picked; extracting frames...")
+    with_frames: list[tuple[TriCue, list[str], Path | None]] = []
+    for cue, tags in picks:
+        frame = extract_frame_with_subs(video, out_subtitle, cue, spotcheck_dir)
+        with_frames.append((cue, tags, frame))
+    html_path = spotcheck_dir / "index.html"
+    write_spotcheck_html(with_frames, html_path, video.stem)
+    print(f"spotcheck: wrote {html_path}")
+    print(f"           open with:  start {html_path}")
+
+
 # ---------- validation ----------
 
 def validate(cues: list[TriCue]) -> None:
@@ -748,8 +2215,8 @@ def cmd_inspect(mkv: Path, names_arg: Path | None) -> None:
         else:
             print("names: no IMDb tag in filename; pass --names <path> if you have a names file")
     else:
-        nm, em = load_names(names_path)
-        print(f"names: {names_path} ({len(nm)} hanzi, {len(em)} english fixes)")
+        nm, em, gl = load_names(names_path)
+        print(f"names: {names_path} ({len(nm)} hanzi, {len(em)} english fixes, {len(gl)} glossary entries)")
 
 
 def main() -> None:
@@ -775,6 +2242,36 @@ def main() -> None:
     ap.add_argument("--window-ms", type=int, default=1500,
                     help="per-cue alignment tolerance (default 1500)")
     ap.add_argument("--no-bom", action="store_true", help="write output without UTF-8 BOM")
+    ap.add_argument("--enrich", action="store_true",
+                    help="apply Phase-1 dictionary correction (CC-CEDICT-driven empty-cue fill); writes <out>.changes.tsv")
+    ap.add_argument("--cedict", type=Path, default=None,
+                    help="path to CC-CEDICT text file (default: Research/primary_sources/cedict/cedict_1_0_ts_utf-8_mdbg.txt)")
+    ap.add_argument("--llm-verify", action="store_true",
+                    help="(Phase 2) run llama.cpp on the Arc A770 to re-translate dictionary-filled cues. Requires --enrich. Uses --llm-model / --llm-server (or env vars PINSUB_LLM_GGUF / PINSUB_LLAMA_SERVER).")
+    ap.add_argument("--llm-model", type=Path, default=None,
+                    help="GGUF model file for --llm-verify (default: $PINSUB_LLM_GGUF env var)")
+    ap.add_argument("--llm-server", type=Path, default=None,
+                    help="llama-server.exe path for --llm-verify (default: $PINSUB_LLAMA_SERVER env var)")
+    ap.add_argument("--llm-port", type=int, default=LLAMA_DEFAULT_PORT,
+                    help=f"localhost port for llama-server (default {LLAMA_DEFAULT_PORT})")
+    ap.add_argument("--llm-ctx", type=int, default=4096,
+                    help="llama-server --ctx-size (default 4096)")
+    ap.add_argument("--llm-max", type=int, default=0,
+                    help="cap LLM pass at N cues (0 = no cap; useful for smoke tests)")
+    ap.add_argument("--llm-correct", action="store_true",
+                    help="(Phase 3) broader translate-then-compare correction. Python heuristic prefilters; TIMMY translates fresh; a second TIMMY compares against existing English. Requires --enrich (for the cedict gloss used by the heuristic).")
+    ap.add_argument("--llm-scope", choices=("fills", "divergent", "all"), default="divergent",
+                    help="--llm-correct scope: 'fills' (only Phase-1 empty fills), 'divergent' (Python flags suspect cues; default), 'all' (every cue with Chinese)")
+    ap.add_argument("--llm-no-compare", action="store_true",
+                    help="with --llm-correct, skip the comparison pass (use TIMMY's translation outright)")
+    ap.add_argument("--llm-rounds", type=int, default=3,
+                    help="max retry rounds for --llm-correct when a TIMMY translation fails Python validation (default 3)")
+    ap.add_argument("--llm-threads", type=int, default=4,
+                    help="CPU thread cap for llama-server (default 4). Vulkan path spends most time on GPU; this caps CPU side. Lower if you need CPU headroom.")
+    ap.add_argument("--spotcheck", action="store_true",
+                    help="after writing the trilingual output, run a visual spot-check: pick cues at risk of formatting/overflow/alignment issues, ffmpeg-extract a frame at each cue's mid-time with subtitles burned in, output an HTML grid for human review.")
+    ap.add_argument("--spotcheck-n", type=int, default=12,
+                    help="number of cues to surface in the spotcheck grid (default 12)")
     args = ap.parse_args()
 
     if args.inspect:
@@ -792,12 +2289,28 @@ def main() -> None:
         sys.exit(f"missing: {args.zh}")
 
     names_path = find_names_file(args.names, args.mkv)
-    name_map, english_name_map = load_names(names_path)
+    name_map, english_name_map, per_film_glossary = load_names(names_path)
     if names_path:
-        print(f"loaded names: {names_path.name} ({len(name_map)} hanzi, {len(english_name_map)} english fixes)")
+        print(f"loaded names: {names_path.name} ({len(name_map)} hanzi, {len(english_name_map)} english fixes, {len(per_film_glossary)} film-glossary entries)")
     else:
         print("no names file found — name capitalization and English Wade-Giles fixes will be skipped")
 
+    # Load global glossary (and merge per-film over it). Used by the LLM stages
+    # only — pinyin and dictionary stages don't need it.
+    global_glossary = load_glossary()
+    glossary = merge_glossaries(global_glossary, per_film_glossary)
+    if glossary:
+        print(f"loaded glossary: {len(global_glossary)} global + {len(per_film_glossary)} film-specific = {len(glossary)} active entries")
+
+    # Load TIMMY system prompt from README_TIMMY.md if present, else fall back
+    # to the inline default. The prompt file is the canonical place to tune
+    # translation behavior, quirks, and policy without touching code.
+    timmy_system_prompt = load_timmy_prompt() or LLM_FRESH_SYSTEM_PROMPT
+    if (Path(__file__).parent / "README_TIMMY.md").exists():
+        print(f"loaded TIMMY prompt: README_TIMMY.md ({len(timmy_system_prompt)} chars)")
+    else:
+        print("no README_TIMMY.md found — using inline fallback prompt")
+
     work_dir = args.out.parent
 
     # 1. English source: provided, or extract from mkv.
@@ -879,6 +2392,217 @@ def main() -> None:
             print(f"warn: could not load {alt_path.name}: {e}", file=sys.stderr)
     print(f"  {len(merged)} merged cues")
 
+    # 5b. Phase-1 dictionary correction (if --enrich). Runs AFTER translations.json
+    #     so manual per-cue overrides take precedence; only fills empty English.
+    cedict: dict = {}
+    filled_indices: set[int] = set()
+    changelog_rows: list[tuple] = []
+    changelog_path: Path | None = None
+    if args.enrich:
+        cedict_path = args.cedict if args.cedict else DEFAULT_CEDICT_PATH
+        print(f"loading dictionary: {cedict_path.name}...")
+        cedict = load_cedict(cedict_path)
+        if not cedict:
+            print("warn: dictionary empty/missing — skipping enrichment", file=sys.stderr)
+        else:
+            print(f"  {len(cedict)} simplified-Hanzi keys")
+            changelog_path = args.out.with_suffix(args.out.suffix + ".changes.tsv")
+            merged, filled_indices, changelog_rows = enrich_cues(
+                merged, cedict, changelog_path, name_map=name_map,
+            )
+
+    # 5c. Phase-2 LLM verifier (if --llm-verify). Uses the local llama.cpp HTTP
+    #     server on the Arc A770. Verifies the cues filled by Phase 1.
+    if args.llm_verify:
+        if not args.enrich:
+            sys.exit("--llm-verify requires --enrich")
+        server_exe = args.llm_server or Path(os.environ.get("PINSUB_LLAMA_SERVER", ""))
+        model_path = args.llm_model or Path(os.environ.get("PINSUB_LLM_GGUF", ""))
+        if not server_exe or str(server_exe) == ".":
+            sys.exit("--llm-verify needs --llm-server <path> or PINSUB_LLAMA_SERVER env var")
+        if not model_path or str(model_path) == ".":
+            sys.exit("--llm-verify needs --llm-model <path> or PINSUB_LLM_GGUF env var")
+        if not filled_indices:
+            print("llm verify: no cues were filled by Phase 1; nothing to verify")
+        else:
+            if args.llm_max and args.llm_max > 0:
+                # Smoke-test mode: cap the LLM verify pass at N cues.
+                indices_sorted = sorted(filled_indices)[: args.llm_max]
+                filled_indices = set(indices_sorted)
+                print(f"llm verify: --llm-max set, verifying first {len(filled_indices)} cue(s)")
+
+            print("🟧 A770 USE: spawning llama-server on the Arc.")
+            print(f"   model:  {model_path.name}  (~{model_path.stat().st_size / 1e9:.1f} GB on disk)")
+            print(f"   est. VRAM peak: ~12 GB  (10.5 GB model + ~1.5 GB KV @ ctx={args.llm_ctx})")
+            print(f"   est. runtime:   ~10 s per cue × {len(filled_indices)} cue(s) + ~30 s startup")
+            device = find_arc_vulkan_device(server_exe)
+            if device:
+                print(f"   device: {device}")
+            else:
+                print("   device: (auto — could not enumerate; llama-server will pick)")
+
+            session = start_llama_server(
+                server_exe, model_path,
+                port=args.llm_port, device=device, ctx_size=args.llm_ctx,
+                n_threads=args.llm_threads,
+            )
+
+            # Cache file is keyed per-film by IMDb id when present.
+            imdb_id = None
+            if args.mkv:
+                m = IMDB_RE.search(args.mkv.name)
+                if m:
+                    imdb_id = m.group(1)
+            cache_path = (Path(__file__).parent / "Research" / "cache"
+                          / f"{imdb_id or 'noimdb'}.llm.json")
+            cache = load_llm_cache(cache_path)
+
+            replaced = 0
+            success = False
+            try:
+                replaced = verify_cues_with_llm(
+                    merged, session, filled_indices, name_map, cedict, cache,
+                    model_name=model_path.name, changelog_rows=changelog_rows,
+                )
+                print(f"llm verify: {replaced} cue(s) replaced from LLM output")
+                save_llm_cache(cache_path, cache)
+                success = True
+            finally:
+                stop_llama_server(session)
+                # A770 usage log entry — required by project rule.
+                a770_log = Path(__file__).parent / "Logs" / "A770_usage.md"
+                write_a770_usage_log(
+                    a770_log, session,
+                    job="PinSub --llm-verify",
+                    cues_processed=len(filled_indices),
+                    cues_replaced=replaced,
+                    source_session_path=None,
+                    success=success,
+                    notes=f"film={imdb_id or args.mkv.name}; cache={cache_path.name}",
+                )
+
+            # Append the VERIFY rows to the existing FILL changelog.
+            if changelog_path is not None and changelog_rows:
+                # Rewrite the changelog with the merged rows.
+                try:
+                    with changelog_path.open("w", encoding="utf-8", newline="") as f:
+                        f.write("idx\taction\thanzi\tbefore\tafter\n")
+                        for row in changelog_rows:
+                            f.write("\t".join(
+                                str(x).replace("\t", " ").replace("\n", " | ") for x in row
+                            ) + "\n")
+                except OSError as e:
+                    print(f"warn: could not rewrite changelog {changelog_path}: {e}", file=sys.stderr)
+
+    # 5d. Phase-3 LLM-correct (if --llm-correct). Broader translate-then-compare:
+    #     Python heuristic prefilters suspect cues; TIMMY translates each from
+    #     scratch (no existing English in the prompt); a separate TIMMY compares
+    #     the two candidates per cue. Reuses the llama-server pattern.
+    if args.llm_correct:
+        if not args.enrich:
+            sys.exit("--llm-correct requires --enrich (cedict gloss feeds the divergence heuristic)")
+        if not cedict:
+            sys.exit("--llm-correct requires a loaded CC-CEDICT")
+        server_exe = args.llm_server or Path(os.environ.get("PINSUB_LLAMA_SERVER", ""))
+        model_path = args.llm_model or Path(os.environ.get("PINSUB_LLM_GGUF", ""))
+        if not server_exe or str(server_exe) == ".":
+            sys.exit("--llm-correct needs --llm-server <path> or PINSUB_LLAMA_SERVER env var")
+        if not model_path or str(model_path) == ".":
+            sys.exit("--llm-correct needs --llm-model <path> or PINSUB_LLM_GGUF env var")
+
+        # Pre-survey: how many cues will the Python heuristic select before we
+        # spawn the server? Saves an A770 startup if the answer is zero.
+        survey_counts: dict[str, int] = {}
+        candidate_count = 0
+        for c in merged:
+            if not c.hanzi:
+                continue
+            suspect, reason = detect_suspect_english(c, cedict, name_map)
+            survey_counts[reason] = survey_counts.get(reason, 0) + 1
+            if args.llm_scope == "fills":
+                if not c.english.strip():
+                    candidate_count += 1
+            elif args.llm_scope == "all":
+                candidate_count += 1
+            elif suspect:
+                candidate_count += 1
+        print(f"llm correct: pre-survey — {candidate_count} candidate cue(s) (scope={args.llm_scope})")
+        top = sorted(survey_counts.items(), key=lambda kv: -kv[1])[:6]
+        print(f"  divergence histogram (top 6): {dict(top)}")
+        if args.llm_max and args.llm_max > 0:
+            candidate_count = min(candidate_count, args.llm_max)
+            print(f"  (capping at --llm-max={args.llm_max})")
+
+        if candidate_count == 0:
+            print("llm correct: no cues to process; skipping")
+        else:
+            print("🟧 A770 USE: spawning llama-server on the Arc.")
+            print(f"   model:  {model_path.name}  (~{model_path.stat().st_size / 1e9:.1f} GB on disk)")
+            print(f"   est. VRAM peak: ~12 GB  (10.5 GB model + ~1.5 GB KV @ ctx={args.llm_ctx})")
+            calls_per_cue = 2 if not args.llm_no_compare else 1
+            print(f"   est. runtime:   ~{candidate_count * calls_per_cue * 8} s "
+                  f"({candidate_count} cue(s) × {calls_per_cue} call(s) × ~8 s)")
+            device = find_arc_vulkan_device(server_exe)
+            if device:
+                print(f"   device: {device}")
+            else:
+                print("   device: (auto)")
+
+            session = start_llama_server(
+                server_exe, model_path,
+                port=args.llm_port, device=device, ctx_size=args.llm_ctx,
+                n_threads=args.llm_threads,
+            )
+
+            imdb_id = None
+            if args.mkv:
+                m = IMDB_RE.search(args.mkv.name)
+                if m:
+                    imdb_id = m.group(1)
+            cache_path = (Path(__file__).parent / "Research" / "cache"
+                          / f"{imdb_id or 'noimdb'}.correct.json")
+
+            replaced = 0
+            counts: dict[str, int] = {}
+            success = False
+            try:
+                replaced, counts = correct_cues_with_llm(
+                    merged, session, cedict, name_map,
+                    glossary=glossary,
+                    system_prompt=timmy_system_prompt,
+                    mode=args.llm_scope, do_compare=not args.llm_no_compare,
+                    max_cues=args.llm_max, max_rounds=args.llm_rounds,
+                    cache_path=cache_path,
+                    changelog_rows=changelog_rows,
+                )
+                print(f"llm correct: {replaced} cue(s) replaced; action counts: {counts}")
+                success = True
+            finally:
+                stop_llama_server(session)
+                a770_log = Path(__file__).parent / "Logs" / "A770_usage.md"
+                write_a770_usage_log(
+                    a770_log, session,
+                    job=f"PinSub --llm-correct --llm-scope={args.llm_scope}"
+                        + ("" if not args.llm_no_compare else " --llm-no-compare"),
+                    cues_processed=candidate_count,
+                    cues_replaced=replaced,
+                    source_session_path=None,
+                    success=success,
+                    notes=f"film={imdb_id or args.mkv.name}; counts={counts}",
+                )
+
+            # Rewrite the changelog including new CORRECT rows.
+            if changelog_path is not None and changelog_rows:
+                try:
+                    with changelog_path.open("w", encoding="utf-8", newline="") as f:
+                        f.write("idx\taction\thanzi\tbefore\tafter\n")
+                        for row in changelog_rows:
+                            f.write("\t".join(
+                                str(x).replace("\t", " ").replace("\n", " | ") for x in row
+                            ) + "\n")
+                except OSError as e:
+                    print(f"warn: could not rewrite changelog {changelog_path}: {e}", file=sys.stderr)
+
     # 6. Validate + write. Output format inferred from --out extension.
     validate(merged)
     ext = args.out.suffix.lower()
@@ -890,6 +2614,10 @@ def main() -> None:
         sys.exit(f"unsupported output extension '{ext}'. Use .ass (recommended) or .srt.")
     print(f"wrote {args.out}")
 
+    # 7. Optional visual spot-check (path A — no VLM required).
+    if args.spotcheck:
+        run_spotcheck(merged, args.out, args.mkv, top_n=args.spotcheck_n)
+
 
 if __name__ == "__main__":
     main()
diff --git a/README_TIMMY.md b/README_TIMMY.md
new file mode 100644
index 0000000..2955408
--- /dev/null
+++ b/README_TIMMY.md
@@ -0,0 +1,71 @@
+You are TIMMY, Qx's local Chinese-to-English subtitle translator for the PinSub pipeline.
+
+Your one job per call: produce ONE short English subtitle line that faithfully renders a single Chinese subtitle cue. Python orchestrates everything else. You are the worker; Python is the brain.
+
+## Big-picture context
+
+- The Chinese subtitle is the **source of truth** for the film. The existing English on Bluray rips and torrents is often shifted across cue boundaries or just wrong; treat any English already in the prompt as an untrusted hint.
+- Your output is shown stacked on screen below the Hanzi and pinyin rows, on the same timestamp. The viewer is a Mandarin learner who wants to map Chinese words to English words as they read.
+- That goal — word-to-word mappability — is more important than fluency. Stilted-but-faithful beats fluent-but-paraphrased.
+
+## Translation philosophy
+
+1. **Word-for-word fidelity on content.** Every noun, verb, named entity, and modifier in the Chinese gets an English equivalent. Don't OMIT content the Chinese expresses; don't INVENT content the Chinese doesn't express.
+2. **Function-word latitude.** Chinese particles, classifiers, possessive markers (的, 了, 着, 啊, 啦, 个, 条 …) often have no English equivalent — drop them. English needs articles, copulas, sometimes pronouns that Chinese implies — add them. Follow each language's natural grammar for function words only.
+3. **Synonym latitude on word choice.** "Spear" / "lance" / "polearm" can all be right for 枪 in a wuxia film. Pick the one that reads cleanly in English while staying faithful to the Chinese.
+4. **Preserve Chinese word order when grammatically tolerable in English.** Only rearrange when the literal order is genuinely incomprehensible.
+5. **Conciseness.** Subtitles must fit on screen. Use the shortest natural English phrasing that captures the Chinese content.
+
+## Hard rules
+
+- **No Chinese characters in your English output, ever.** If you can't translate a term, output `[?]` so Python can flag it for human review. Do not pass Hanzi through.
+- **One English line.** No newlines unless the Chinese itself has a newline mid-cue.
+- **No quotes around your output.** No `"like this"`.
+- **No labels.** Do not prefix `English:`, `Translation:`, `Subtitle:`, etc.
+- **No explanation.** No "this translates as..." or "in this context...". Just the English line.
+- **Append `/no_think` to your reply if you would otherwise produce reasoning.** If you reason internally, your `content` field comes back empty and Python ignores everything.
+
+## Chinese names (the generic rule)
+
+Chinese names — people, places, sects, weapons, dynasties — are rendered in the English row as **bare pinyin with tone marks stripped, syllable spacing preserved, capitals on the first letter of each name part.** Same syllable structure as the pinyin row, just without the diacritics.
+
+Examples:
+
+| Chinese | Pinyin row | English row |
+|---|---|---|
+| 李慕白 | Lǐ Mùbái | Li Mubai |
+| 俞秀莲 | Yú Xiùlián | Yu Xiulian |
+| 武当 | Wǔdāng | Wudang |
+| 青冥剑 | Qīng Míng Jiàn | Qingming Sword |
+
+If the prompt includes a `Known names:` hint for this cue, **use that English form verbatim** — Python has already applied the rule and may have a film-specific spelling that overrides the default. The hint is canonical for this cue.
+
+If no `Known names:` hint is given, generate the English name yourself using the rule above. Use the `Pinyin row:` hint in the prompt to get the syllable structure right.
+
+## Glossary hints
+
+If the prompt includes a `Glossary:` section, those are film-context or wuxia-context translations PinSub has learned matter. Use the suggested English in your output unless it would make the cue grammatically wrong. The glossary captures cases where the literal Chinese-English dictionary is misleading:
+
+- `枪` in a wuxia film = **spear**, not gun.
+- `师娘` = **Master's wife** (wife of one's martial arts teacher), not "Madam Teacher."
+- `镖局` = **security agency** (Qing-era courier/escort outfit), not just "agency."
+
+You also see `Word-for-word target:` in some prompts — that's the literal dictionary gloss Python built. It's stilted but it's the structural skeleton. Match its content; smooth its English.
+
+## Quirks you have done before — stop doing them
+
+Python detects these in your output and will re-ask you with explicit feedback. Avoid them on round 1:
+
+1. **You sometimes leave a Hanzi character untranslated in your English** (e.g., output `"Yes! How's the 镖局 doing business?"`). NEVER leave Hanzi in the English row. If a term is in the `Glossary:` hint, use that. If not, attempt your best English equivalent. If you genuinely don't know, output `[?]` for that term so Python can flag it.
+
+2. **You sometimes smush or respace names** (e.g., `Xiulian` when the `Known names:` hint said `Xiu Lian`). Use the exact spacing from the `Known names:` hint character-for-character. If no hint, follow the rule above (capitals on each syllable, no space between syllables of the same name part, space between separate name parts: surname `Li` then given-name `Mubai` = `Li Mubai`).
+
+3. **You sometimes translate a polysemous Hanzi by its most common dictionary sense rather than its film-context sense** (`枪` → "gun" in a Qing-era wuxia film where it should be "spear"). The `Glossary:` hints exist to prevent this. If the cue's Hanzi contains a glossary key, use the glossary's English.
+
+4. **You sometimes output reasoning before the answer.** Qwen3 puts thinking in a separate `reasoning_content` field which Python discards. Always include `/no_think` in your reply and produce ONLY the English line.
+
+5. **You sometimes wrap the answer in quotes or add lead-in labels** (`"Crouching Tiger, Hidden Dragon"` or `English: Crouching Tiger, Hidden Dragon`). Python strips these defensively, but it's cleaner if you don't add them.
+
+## Output format
+
+A single English subtitle line. Nothing else. No quotes, no labels, no explanation, no thinking. /no_think
diff --git a/glossary.json b/glossary.json
new file mode 100644
index 0000000..2c929f5
--- /dev/null
+++ b/glossary.json
@@ -0,0 +1,112 @@
+{
+  "_help": "Global Chinese→English translation hints for PinSub. PinSub loads this file at startup, scans each cue's Hanzi for any key present here, and passes matching entries to TIMMY as 'Glossary:' hints. Add an entry when TIMMY translates a term wrong; the next run honors the lesson. Per-film overrides live under the 'glossary' key in names/<imdb>.json. Entries with underscore-prefixed keys are ignored.",
+
+  "_format": {
+    "<hanzi>": {
+      "english": "the preferred English translation",
+      "context": "one-line note on when/why this matters (audience-facing rationale)",
+      "tags": ["wuxia", "qing-era", "kungfu", "..."]
+    }
+  },
+
+  "枪": {
+    "english": "spear",
+    "context": "in wuxia / Qing-era films this is the long-handled bladed weapon, NOT a firearm",
+    "tags": ["wuxia", "weapon"]
+  },
+  "师娘": {
+    "english": "Master's wife",
+    "context": "wife of one's martial-arts teacher; respectful address in wuxia",
+    "tags": ["wuxia", "kungfu", "kinship"]
+  },
+  "镖局": {
+    "english": "security agency",
+    "context": "Qing-era courier/escort outfit that delivered valuables under armed protection",
+    "tags": ["wuxia", "qing-era", "occupation"]
+  },
+  "镖师": {
+    "english": "escort guard",
+    "context": "a fighter employed by a 镖局",
+    "tags": ["wuxia", "occupation"]
+  },
+  "师父": {
+    "english": "Master",
+    "context": "respectful address for one's martial-arts teacher; addressed in second person",
+    "tags": ["wuxia", "kungfu", "address"]
+  },
+  "师傅": {
+    "english": "Master",
+    "context": "respectful address for a skilled craftsman or teacher; often interchangeable with 师父 in dubbing",
+    "tags": ["wuxia", "kungfu", "address"]
+  },
+  "弟子": {
+    "english": "disciple",
+    "context": "student of a martial-arts master",
+    "tags": ["wuxia", "kungfu", "kinship"]
+  },
+  "侠": {
+    "english": "swordsman",
+    "context": "wuxia / martial-arts hero — broader than 'knight'; 'warrior' is acceptable",
+    "tags": ["wuxia"]
+  },
+  "江湖": {
+    "english": "the martial world",
+    "context": "literally 'rivers and lakes' — the underground world of itinerant fighters / outlaws / sects in wuxia",
+    "tags": ["wuxia"]
+  },
+  "门派": {
+    "english": "sect",
+    "context": "martial-arts school or lineage",
+    "tags": ["wuxia", "kungfu"]
+  },
+  "武林": {
+    "english": "the martial-arts community",
+    "context": "the collective world of martial-arts practitioners and sects",
+    "tags": ["wuxia"]
+  },
+  "功夫": {
+    "english": "kung fu",
+    "context": "the standard romanization; preserve as 'kung fu' rather than 'gongfu' for English-speaking audiences",
+    "tags": ["wuxia", "kungfu"]
+  },
+  "内功": {
+    "english": "internal energy",
+    "context": "Qi-based cultivation skill in wuxia",
+    "tags": ["wuxia"]
+  },
+  "气功": {
+    "english": "qigong",
+    "context": "energy cultivation; preserve as 'qigong' (the standard English loan)",
+    "tags": ["wuxia", "kungfu"]
+  },
+  "闭关": {
+    "english": "go into seclusion",
+    "context": "withdraw to meditate / cultivate in isolation; specifically wuxia term",
+    "tags": ["wuxia", "practice"]
+  },
+  "修练": {
+    "english": "practice",
+    "context": "ongoing self-cultivation of skill; less literal than 'cultivate'",
+    "tags": ["wuxia"]
+  },
+  "真人": {
+    "english": "Zhenren",
+    "context": "honorific for a Taoist master / accomplished cultivator; preserve as 'Zhenren' or render contextually as 'Master'",
+    "tags": ["wuxia", "address"]
+  },
+  "护法": {
+    "english": "guardian",
+    "context": "in wuxia, a temple/sect protector",
+    "tags": ["wuxia"]
+  },
+  "拜": {
+    "english": "pay respect",
+    "context": "ceremonial bow / formal greeting in wuxia; rarely 'worship' unless religious context",
+    "tags": ["wuxia"]
+  },
+  "拜师": {
+    "english": "take as Master",
+    "context": "the ceremony where a student formally enters a master's tutelage",
+    "tags": ["wuxia", "kungfu"]
+  }
+}