From e53e90a1f105538b11dbb66fa103467e4c0405f1 Mon Sep 17 00:00:00 2001
From: agrechenkov <arseniy.grechenkov@cyclelabs.io>
Date: Tue, 16 Jun 2026 09:49:20 -0400
Subject: [PATCH] Retire tkinter chat popup; route Ctrl+Shift+T / Ask / tray to
 web Chat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR 3 of chat→web. Chat is now fully daemon-backed + served in the dashboard
(PR1+PR2), so the standalone tkinter popup is removed:

- Hotkeys/tray → web: Ctrl+Shift+T (chatHk) and the tray "Open Chat" now open
  the dashboard at #chat via OpenWebDashboard("chat"); Ctrl+Shift+A
  (AskWithSelection) stages the selection (chat_stage_selection) then opens
  #chat, where the Chat tab prefills it. OpenWebDashboard_Impl gained a `tab`
  param that appends the URL hash.
- Removed the popup machinery: deleted scripts/chat_popup.py; dropped LaunchChat_Impl,
  the 52640 socket + chat_send_selection/chat_reload/chat_restart daemon actions
  (+ _chat_launch_argv / ingest-nonce helpers), the ffp-chat.exe arm of the AHK
  bridge + process-kill list + chatScriptPath, and the installer's KillChat
  taskkill. setActiveModel no longer pings the popup (daemon chat reads the model
  live).
- Freeze: dropped ffp-chat from the PyInstaller spec (Analysis/MERGE/PYZ/EXE/
  COLLECT + hiddenimports) and pyproject (py-modules + scripts entry-point) — now
  three exes.

Deleted the obsolete test_chat_popup.py; daemon action-count test 59→56 (asserts
the socket actions are gone). ruff + 188 tests + node --check + AHK parse-check
all green. (installer/README.md still has two cosmetic ffp-chat.exe mentions —
doc-only, follow-up.)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 installer/fastflowprompt.spec |  20 +-
 installer/installer.iss       |   3 -
 pyproject.toml                |   2 -
 scripts/chat_popup.py         | 958 ----------------------------------
 scripts/ffp_daemon.py         | 100 ----
 scripts/grammarFix.ahk        |  10 +-
 scripts/lib/daemon_client.ahk |  24 +-
 scripts/lib/paths.ahk         |   1 -
 scripts/ui/tray.ahk           |   6 +-
 scripts/ui/web/app.js         |   1 -
 tests/test_chat_popup.py      | 132 -----
 tests/test_ffp_daemon.py      |  25 +-
 12 files changed, 24 insertions(+), 1258 deletions(-)
 delete mode 100644 scripts/chat_popup.py
 delete mode 100644 tests/test_chat_popup.py

diff --git a/installer/fastflowprompt.spec b/installer/fastflowprompt.spec
index 7f233f7..209edb2 100644
--- a/installer/fastflowprompt.spec
+++ b/installer/fastflowprompt.spec
@@ -8,12 +8,12 @@ Build:
 
 Output: dist/FastFlowPrompt/  (onedir, ~25 MB after merge dedupe)
 
-Produces four executables sharing one runtime tree:
+Produces three executables sharing one runtime tree (chat is now served by the
+daemon's web dashboard — the standalone chat popup was retired):
 
     dist/FastFlowPrompt/
-    ├── ffp-daemon.exe        windowed, long-running action server
+    ├── ffp-daemon.exe        windowed, long-running action server (+ web chat)
     ├── ffp-grammar-fix.exe   console, AHK subprocess fallback
-    ├── ffp-chat.exe          windowed, chat popup
     ├── ffp-first-run.exe     windowed, first-run wizard
     ├── _internal/            shared Python runtime + dlls + py-modules
     └── setup/defaults/       seed config (read-only)
@@ -66,7 +66,6 @@ HIDDEN_IMPORTS = [
     "loopback_http",
     "paths",
     "grammar_fix",
-    "chat_popup",
     "ffp_daemon",
     "first_run",
     "install",
@@ -83,9 +82,9 @@ DATAS = [
     (os.path.join(SCRIPTS_DIR, "assets"), "assets"),
 ]
 
-# NOTE: do NOT exclude tkinter — chat_popup.py and first_run.py import it, and
-# excluding it ships ffp-chat.exe / ffp-first-run.exe without _tkinter, so they
-# crash at launch. (Regression caught building the installer from this spec.)
+# NOTE: do NOT exclude tkinter — first_run.py (the wizard) imports it; excluding
+# it ships ffp-first-run.exe without _tkinter, so it crashes at launch.
+# (Regression caught building the installer from this spec.)
 EXCLUDES = [
     "test", "unittest", "pydoc", "doctest",
     "lib2to3", "pip", "setuptools", "wheel",
@@ -112,20 +111,17 @@ def _analysis(script_name: str) -> "Analysis":
 
 a_daemon  = _analysis("ffp_daemon.py")
 a_grammar = _analysis("grammar_fix.py")
-a_chat    = _analysis("chat_popup.py")
 a_wizard  = _analysis("first_run.py")
 
-# Dedupe shared deps across all four bundles.
+# Dedupe shared deps across all three bundles.
 MERGE(
     (a_daemon,  "ffp_daemon",  "ffp-daemon"),
     (a_grammar, "grammar_fix", "ffp-grammar-fix"),
-    (a_chat,    "chat_popup",  "ffp-chat"),
     (a_wizard,  "first_run",   "ffp-first-run"),
 )
 
 pyz_daemon  = PYZ(a_daemon.pure,  a_daemon.zipped_data,  cipher=block_cipher)
 pyz_grammar = PYZ(a_grammar.pure, a_grammar.zipped_data, cipher=block_cipher)
-pyz_chat    = PYZ(a_chat.pure,    a_chat.zipped_data,    cipher=block_cipher)
 pyz_wizard  = PYZ(a_wizard.pure,  a_wizard.zipped_data,  cipher=block_cipher)
 
 
@@ -153,13 +149,11 @@ def _exe(pyz, analysis, name, console):
 
 exe_daemon  = _exe(pyz_daemon,  a_daemon,  "ffp-daemon",      console=False)
 exe_grammar = _exe(pyz_grammar, a_grammar, "ffp-grammar-fix", console=True)
-exe_chat    = _exe(pyz_chat,    a_chat,    "ffp-chat",        console=False)
 exe_wizard  = _exe(pyz_wizard,  a_wizard,  "ffp-first-run",   console=False)
 
 COLLECT(
     exe_daemon,  a_daemon.binaries,  a_daemon.zipfiles,  a_daemon.datas,
     exe_grammar, a_grammar.binaries, a_grammar.zipfiles, a_grammar.datas,
-    exe_chat,    a_chat.binaries,    a_chat.zipfiles,    a_chat.datas,
     exe_wizard,  a_wizard.binaries,  a_wizard.zipfiles,  a_wizard.datas,
     strip=False,
     upx=False,
diff --git a/installer/installer.iss b/installer/installer.iss
index dc4b51d..d22c8b3 100644
--- a/installer/installer.iss
+++ b/installer/installer.iss
@@ -14,7 +14,6 @@
 ;     {app}\                            Program Files\FastFlowPrompt (read-only)
 ;       ffp-daemon.exe                 PyInstaller bundle, flattened into {app}
 ;       ffp-grammar-fix.exe
-;       ffp-chat.exe
 ;       ffp-first-run.exe
 ;       _internal\                     shared Python runtime + bundled datas
 ;       ahk\
@@ -174,8 +173,6 @@ Root: HKLM; Subkey: "Software\Microsoft\Windows\CurrentVersion\Run"; \
 ;     won't always trip the close-apps prompt. Kill explicitly.
 Filename: "{sys}\taskkill.exe"; Parameters: "/F /IM ffp-daemon.exe /T"; \
   RunOnceId: "KillDaemon"; Flags: runhidden waituntilterminated
-Filename: "{sys}\taskkill.exe"; Parameters: "/F /IM ffp-chat.exe /T"; \
-  RunOnceId: "KillChat"; Flags: runhidden waituntilterminated
 Filename: "{sys}\taskkill.exe"; \
   Parameters: "/F /IM AutoHotkey64.exe /FI ""WINDOWTITLE eq grammarFix*"""; \
   RunOnceId: "KillAhk"; Flags: runhidden waituntilterminated
diff --git a/pyproject.toml b/pyproject.toml
index 83a8dd0..3cd0c02 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -64,7 +64,6 @@ Issues = "https://github.com/agr77one/Fastflow/issues"
 [project.scripts]
 ffp-daemon = "ffp_daemon:main"
 ffp-grammar-fix = "grammar_fix:main"
-ffp-chat = "chat_popup:main"
 ffp-first-run = "first_run:main"
 ffp-install = "install:main"
 
@@ -93,7 +92,6 @@ py-modules = [
     "loopback_http",
     "paths",
     "grammar_fix",
-    "chat_popup",
     "ffp_daemon",
     "first_run",
     "install",
diff --git a/scripts/chat_popup.py b/scripts/chat_popup.py
deleted file mode 100644
index eb0c869..0000000
--- a/scripts/chat_popup.py
+++ /dev/null
@@ -1,958 +0,0 @@
-"""Modal chat popup for the local FLM server, with multi-tab conversations.
-
-Config is read from the shared `grammar_hotkey.config.json` under the `chat`
-block; `llm_base_url` and `llm_model` fall back to the top-level
-`flm_base_url` / `flm_model` so the chat window always talks to the same
-endpoint as the grammar/prompt hotkeys.
-
-Conversations are organized as tabs (ttk.Notebook). Each tab is a separate
-thread with its own history. Threads are persisted to `chat_threads.jsonl`
-sitting next to this script; the file is rewritten on every save so only the
-latest snapshot per thread is retained (keeps it small while preserving full
-conversation memory across launches).
-
-Stdlib only. Single-instance enforced via a loopback TCP lock.
-"""
-
-from __future__ import annotations
-
-import argparse
-import json
-import logging
-import queue
-import secrets
-import socket
-import subprocess
-import sys
-import threading
-import time
-import tkinter as tk
-import urllib.error
-import urllib.request
-import uuid
-from tkinter import scrolledtext, ttk
-
-import ffp_config
-import loopback_http
-import paths as _paths
-from subprocess_util import NO_WINDOW
-
-log = logging.getLogger("ffp.chat")
-
-SHARED_CONFIG_PATH = _paths.CONFIG_FILE
-DAEMON_BASE_URL = "http://127.0.0.1:52650"
-THREADS_PATH = _paths.CHAT_THREADS_FILE
-INGEST_NONCE_PATH = _paths.DATA_DIR / ".chat_ingest_nonce"
-MAX_LOADED_THREADS = 20
-TITLE_MAX_CHARS = 24
-
-DEFAULTS = {
-    "llm_base_url": "http://127.0.0.1:52625",
-    "llm_model": "qwen3.5:4b",
-    "llm_auth_bearer": "flm",
-    "request_timeout_seconds": 240,
-    "temperature": 0.3,
-    "max_tokens": 1024,
-    "context_window_turns": 12,
-    "system_prompt": "You are a concise, helpful local assistant.",
-    "window": {
-        "title": "Local LLM Chat",
-        "width": 640,
-        "height": 600,
-        "topmost": True,
-        "single_instance_port": 52640,
-    },
-}
-
-
-def build_notes_context_message(query: str, search_fn, max_notes: int = 4) -> tuple[str | None, list[str]]:
-    """Build the retrieval-injection system message for notes mode.
-
-    Runs the ranked vault search and formats the top hits as grounding context
-    (the model is told to cite note titles in [brackets]). Returns
-    (message, titles); (None, []) when nothing matched so the caller can fall
-    back to a plain turn. Pure given search_fn — unit-testable without a vault.
-    """
-    try:
-        found = search_fn(query, max_notes)
-    except Exception:
-        return None, []
-    results = (found or {}).get("results") or []
-    if not results:
-        return None, []
-    titles: list[str] = []
-    blocks: list[str] = []
-    for r in results:
-        title = str(r.get("title") or "untitled")
-        titles.append(title)
-        category = str(r.get("category") or "")
-        snippet = str(r.get("snippet") or "").strip()
-        blocks.append(f"[{title}] ({category})\n{snippet}")
-    message = (
-        "The user has a personal notes vault. The saved notes below are "
-        "relevant to their next message. Ground your answer in these notes and "
-        "cite note titles in [brackets] when you use them. If the notes do not "
-        "answer the question, say so briefly instead of guessing.\n\n"
-        + "\n\n".join(blocks)
-    )
-    return message, titles
-
-
-def retrieve_notes_context(query: str, max_notes: int = 4) -> tuple[str | None, list[str]]:
-    """Vault-backed wrapper around build_notes_context_message. The notes
-    module (which pulls in grammar_fix/config) is imported lazily so chat
-    startup doesn't pay for it when notes mode is never used."""
-    try:
-        import notes
-    except Exception:
-        return None, []
-    return build_notes_context_message(query, notes.search_notes, max_notes)
-
-
-def load_config() -> dict:
-    """Merge the shared config's `chat` block over DEFAULTS.
-
-    Endpoint + model always come from top-level ``flm_*`` keys (same source as
-    the grammar hotkeys and dashboard). The ``chat`` block cannot override them.
-    """
-    cfg = json.loads(json.dumps(DEFAULTS))
-    shared: dict = {}
-    if SHARED_CONFIG_PATH.exists():
-        try:
-            shared = json.loads(SHARED_CONFIG_PATH.read_text(encoding="utf-8"))
-        except Exception as exc:
-            log.warning("failed to read shared config %s: %s", SHARED_CONFIG_PATH, exc)
-            shared = {}
-
-    chat_block = dict((shared.get("chat") or {}) if isinstance(shared, dict) else {})
-    # Never let a stale chat.llm_* shadow the live flm_* selection.
-    chat_block.pop("llm_model", None)
-    chat_block.pop("llm_base_url", None)
-
-    for k, v in chat_block.items():
-        if isinstance(v, dict) and isinstance(cfg.get(k), dict):
-            cfg[k].update(v)
-        else:
-            cfg[k] = v
-
-    llm_block = shared.get("llm") if isinstance(shared.get("llm"), dict) else {}
-    cfg["llm_model"] = str(
-        llm_block.get("model") or shared.get("flm_model") or cfg.get("llm_model") or DEFAULTS["llm_model"]
-    ).strip()
-    raw_url = str(
-        llm_block.get("base_url") or shared.get("flm_base_url") or cfg.get("llm_base_url") or DEFAULTS["llm_base_url"]
-    ).strip()
-    cfg["llm_auth_bearer"] = str(
-        llm_block.get("auth_bearer") or cfg.get("llm_auth_bearer") or DEFAULTS["llm_auth_bearer"]
-    ).strip()
-    try:
-        cfg["llm_base_url"] = ffp_config.validate_flm_base_url(raw_url)
-    except ValueError as exc:
-        log.warning("invalid chat llm_base_url, using default: %s", exc)
-        cfg["llm_base_url"] = DEFAULTS["llm_base_url"]
-    return _overlay_live_flm_settings(cfg)
-
-
-def _overlay_live_flm_settings(cfg: dict) -> dict:
-    """Prefer the daemon's in-memory config (same source as the dashboard)."""
-    try:
-        payload = loopback_http.json_post(
-            DAEMON_BASE_URL + "/action/config_snapshot",
-            {"args": {}},
-            headers=loopback_http.daemon_headers(),
-            timeout=2.0,
-        )
-        if payload.get("ok") and isinstance(payload.get("result"), dict):
-            live = payload["result"]
-            model = str(live.get("flm_model") or "").strip()
-            url = str(live.get("flm_base_url") or "").strip()
-            llm_block = live.get("llm") if isinstance(live.get("llm"), dict) else {}
-            model = str(llm_block.get("model") or model).strip()
-            url = str(llm_block.get("base_url") or url).strip()
-            bearer = str(llm_block.get("auth_bearer") or "").strip()
-            if model:
-                cfg["llm_model"] = model
-            if url:
-                cfg["llm_base_url"] = ffp_config.validate_flm_base_url(url)
-            if bearer:
-                cfg["llm_auth_bearer"] = bearer
-    except Exception as exc:
-        log.debug("daemon config_snapshot unavailable, using file config: %s", exc)
-    return cfg
-
-
-# ---------- Thread persistence -------------------------------------------------------
-
-def _now_iso() -> str:
-    return time.strftime("%Y-%m-%dT%H:%M:%S")
-
-
-def load_threads() -> list[dict]:
-    """Read chat_threads.jsonl, return latest snapshot per thread, newest first."""
-    if not THREADS_PATH.exists():
-        return []
-    latest: dict[str, dict] = {}
-    try:
-        with THREADS_PATH.open("r", encoding="utf-8", errors="replace") as f:
-            for raw in f:
-                raw = raw.strip()
-                if not raw:
-                    continue
-                try:
-                    row = json.loads(raw)
-                except Exception:
-                    continue
-                tid = row.get("thread_id")
-                if not tid:
-                    continue
-                prev = latest.get(tid)
-                if (prev is None) or (str(row.get("updated_at") or "") >= str(prev.get("updated_at") or "")):
-                    latest[tid] = row
-    except Exception:
-        return []
-    ordered = sorted(latest.values(), key=lambda r: str(r.get("updated_at") or ""), reverse=True)
-    return ordered[:MAX_LOADED_THREADS]
-
-
-def save_threads(threads: list[dict]) -> None:
-    """Compact-rewrite: one line per thread, atomic via tmp+replace."""
-    try:
-        tmp = THREADS_PATH.with_suffix(".jsonl.tmp")
-        with tmp.open("w", encoding="utf-8") as f:
-            for t in threads:
-                f.write(json.dumps(t, ensure_ascii=False) + "\n")
-        tmp.replace(THREADS_PATH)
-    except Exception as exc:
-        log.warning("failed to save chat threads: %s", exc)
-
-
-# ---------- Single-instance guard ----------------------------------------------------
-
-def _ensure_ingest_nonce() -> str:
-    """Publish a per-instance nonce so only the daemon can inject selections."""
-    nonce = secrets.token_hex(16)
-    try:
-        _paths.DATA_DIR.mkdir(parents=True, exist_ok=True)
-        INGEST_NONCE_PATH.write_text(nonce, encoding="utf-8")
-    except OSError as exc:
-        log.warning("failed to write ingest nonce: %s", exc)
-    return nonce
-
-
-def try_acquire_single_instance(port: int) -> socket.socket | None:
-    """Bind a loopback port. Return the socket on success, None if another instance owns it."""
-    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-    try:
-        s.bind(("127.0.0.1", port))
-        s.listen(4)
-        s.setblocking(False)
-        return s
-    except OSError:
-        s.close()
-        return None
-
-
-def ping_existing_instance(port: int) -> bool:
-    """Ask the running instance to reload config and surface its window."""
-    try:
-        with socket.create_connection(("127.0.0.1", port), timeout=0.5) as c:
-            c.sendall(b"RELOAD\nSHOW\n")
-        return True
-    except OSError:
-        return False
-
-
-# ---------- LLM client ---------------------------------------------------------------
-
-class LLMClient:
-    """Minimal OpenAI-compatible chat client for the local FLM endpoint."""
-
-    def __init__(self, cfg: dict):
-        self.base_url = str(cfg["llm_base_url"]).rstrip("/")
-        self.model = str(cfg["llm_model"])
-        self.bearer = str(cfg.get("llm_auth_bearer") or "")
-        self.timeout = int(cfg.get("request_timeout_seconds", 240))
-        self.temperature = float(cfg.get("temperature", 0.3))
-        self.max_tokens = int(cfg.get("max_tokens", 1024))
-
-    def chat(self, messages: list[dict]) -> str:
-        """POST /v1/chat/completions and return the assistant content. Raises
-        RuntimeError on transport, timeout, parse, or empty-choices errors."""
-        body = json.dumps({
-            "model": self.model,
-            "messages": messages,
-            "temperature": self.temperature,
-            "max_tokens": self.max_tokens,
-            "stream": False,
-        }).encode("utf-8")
-        headers = {"Content-Type": "application/json"}
-        if self.bearer:
-            headers["Authorization"] = f"Bearer {self.bearer}"
-        req = urllib.request.Request(
-            self.base_url + "/v1/chat/completions",
-            data=body, headers=headers, method="POST",
-        )
-        try:
-            with urllib.request.urlopen(req, timeout=self.timeout) as resp:
-                payload = json.loads(resp.read().decode("utf-8", errors="replace"))
-        except urllib.error.URLError as e:
-            raise RuntimeError(f"LLM unreachable at {self.base_url}: {e.reason}") from e
-        except TimeoutError:
-            raise RuntimeError(f"LLM timed out after {self.timeout}s")
-        except json.JSONDecodeError as e:
-            raise RuntimeError(f"Malformed LLM response: {e}") from e
-
-        choices = payload.get("choices") or []
-        if not choices:
-            raise RuntimeError("LLM returned no choices.")
-        msg = choices[0].get("message") or {}
-        return str(msg.get("content") or "").strip()
-
-
-# ---------- Conversation tab ---------------------------------------------------------
-
-class ConversationTab:
-    """One tab = one thread. Each tab owns its transcript widget, input,
-    history list, and thread_id. Tabs are independent contexts."""
-
-    def __init__(self, app: ChatApp, thread: dict | None = None):
-        thread = thread or {}
-        self.app = app
-        self.thread_id = thread.get("thread_id") or uuid.uuid4().hex
-        self.history: list[dict] = list(thread.get("history") or [])
-        self.title = str(thread.get("title") or "New chat")
-        self.inflight = False
-
-        self.frame = ttk.Frame(app.notebook)
-        self._build()
-        if self.history:
-            for msg in self.history:
-                tag = "user" if msg.get("role") == "user" else "assistant"
-                prefix = "You: " if tag == "user" else "LLM: "
-                self._append(tag, f"{prefix}{msg.get('content','')}\n")
-        else:
-            self._append("meta", "Ctrl+T new tab • Ctrl+W close tab • Enter sends • Shift+Enter newline\n")
-
-    def _build(self) -> None:
-        outer = ttk.Frame(self.frame, padding=6)
-        outer.pack(fill="both", expand=True)
-
-        self.transcript = scrolledtext.ScrolledText(
-            outer, wrap="word", state="disabled", height=18,
-            font=("Segoe UI", 10),
-        )
-        self.transcript.pack(fill="both", expand=True)
-        self.transcript.tag_configure("user", foreground="#1a4fb3", font=("Segoe UI", 10, "bold"))
-        self.transcript.tag_configure("assistant", foreground="#0a6b3a")
-        self.transcript.tag_configure("error", foreground="#a8201a")
-        self.transcript.tag_configure("meta", foreground="#777777", font=("Segoe UI", 9, "italic"))
-
-        self.input = tk.Text(outer, height=4, wrap="word", font=("Segoe UI", 10))
-        self.input.pack(fill="x", pady=(8, 4))
-
-        # Picker bar — only shown when a selection is ingested via Ctrl+Shift+A.
-        # Hidden by default to keep the regular chat UI clean.
-        self.picker_frame = ttk.Frame(outer)
-        self.picker_visible = False
-
-        bar = ttk.Frame(outer)
-        bar.pack(fill="x")
-        # Notes mode: ground the next replies in vault notes retrieved for each
-        # message (per-call context injection — the retrieved text is never
-        # written into the thread history). See SPEC V37 / T31.
-        self.notes_mode = tk.BooleanVar(value=False)
-        ttk.Checkbutton(bar, text="📚 My notes", variable=self.notes_mode).pack(side="left")
-        ttk.Button(bar, text="Clear thread", command=self.on_clear).pack(side="right", padx=(4, 0))
-        self.send_btn = ttk.Button(bar, text="Send  (Enter)", command=self.on_send)
-        self.send_btn.pack(side="right")
-
-        self.input.bind("<Return>", self._on_enter)
-
-    def ingest_selection(self, text: str, source_app: str = "") -> None:
-        """Display a selection as a quoted context block + show the action picker.
-        Called when the user pressed Ctrl+Shift+A in another app."""
-        text = (text or "").strip()
-        if not text:
-            return
-        # Truncate for transcript display only — full text goes into the question.
-        preview = text if len(text) <= 1200 else text[:1200] + " …(truncated for display)"
-        quoted = "\n".join("> " + ln for ln in preview.splitlines() or [""])
-        hdr = f"📥 Ingested selection from {source_app or 'app'} ({len(text)} chars):\n"
-        self._append("meta", hdr + quoted + "\n\n")
-        # Keep the full text around so picker buttons can build the prompt.
-        self._ingested_text = text
-        self._show_picker()
-        # Give a short tab title hint.
-        if self.title == "New chat":
-            preview_title = text.splitlines()[0][:48] if text.splitlines() else text[:48]
-            self.title = "Ask: " + preview_title + ("…" if len(preview_title) >= 48 else "")
-            self.app.rename_tab(self, self.title)
-
-    def _show_picker(self) -> None:
-        for child in self.picker_frame.winfo_children():
-            child.destroy()
-        ttk.Label(self.picker_frame, text="Quick action:").pack(side="left", padx=(0, 6))
-        for label, prompt in [
-            ("Summarize", "Summarize the quoted text above as 3 bullet points."),
-            ("Explain", "Explain the quoted text above in plain English. Call out one non-obvious edge case if any."),
-            ("Improve", "Rewrite the quoted text above to be clearer and more concise. Preserve meaning."),
-        ]:
-            ttk.Button(
-                self.picker_frame, text=label,
-                command=lambda p=prompt: self._picker_send(p),
-            ).pack(side="left", padx=2)
-        ttk.Button(
-            self.picker_frame, text="Ask…",
-            command=self._picker_focus,
-        ).pack(side="left", padx=(8, 0))
-        if not self.picker_visible:
-            self.picker_frame.pack(fill="x", pady=(4, 0), before=self.input)
-            self.picker_visible = True
-
-    def _picker_send(self, prompt: str) -> None:
-        # Compose: action prompt + the original ingested selection as a quoted block.
-        ingested = getattr(self, "_ingested_text", "") or ""
-        quoted = "\n".join("> " + ln for ln in ingested.splitlines() or [""])
-        full = f"{prompt}\n\n{quoted}" if ingested else prompt
-        self.input.delete("1.0", "end")
-        self.input.insert("1.0", full)
-        self.on_send()
-        self._hide_picker()
-
-    def _picker_focus(self) -> None:
-        self.focus_input()
-        self._hide_picker()
-
-    def _hide_picker(self) -> None:
-        if self.picker_visible:
-            try:
-                self.picker_frame.pack_forget()
-            except tk.TclError:
-                pass
-            self.picker_visible = False
-
-    def focus_input(self) -> None:
-        try:
-            self.input.focus_set()
-        except tk.TclError:
-            pass
-
-    def _on_enter(self, _event):
-        self.on_send()
-        return "break"
-
-    def on_send(self) -> None:
-        if self.inflight:
-            return
-        self.app.reload_runtime_config()
-        text = self.input.get("1.0", "end").strip()
-        if not text:
-            return
-        self.input.delete("1.0", "end")
-        self._append("user", f"You: {text}\n")
-        self.history.append({"role": "user", "content": text})
-        if self.title == "New chat":
-            self.title = (text[:TITLE_MAX_CHARS] + "…") if len(text) > TITLE_MAX_CHARS else text
-            self.app.rename_tab(self, self.title)
-        self._set_busy(True)
-        threading.Thread(target=self._worker, args=(list(self.history),), daemon=True).start()
-
-    def _worker(self, history_snapshot: list[dict]) -> None:
-        messages: list[dict] = []
-        if self.app.system_prompt:
-            messages.append({"role": "system", "content": self.app.system_prompt})
-        if self.notes_mode.get() and history_snapshot:
-            query = str(history_snapshot[-1].get("content") or "")
-            context, titles = retrieve_notes_context(query)
-            if context:
-                messages.append({"role": "system", "content": context})
-                self.app.root.after(0, self._append, "meta",
-                                    "📚 grounded on: " + ", ".join(titles) + "\n")
-            else:
-                self.app.root.after(0, self._append, "meta",
-                                    "📚 no matching notes — answering without vault context\n")
-        # Sliding window: keep last N turn-pairs (one turn = user + assistant).
-        # Prevents prompt growth that makes later turns increasingly slow.
-        n_turns = self.app.context_window_turns
-        if n_turns > 0:
-            messages.extend(history_snapshot[-(n_turns * 2):])
-        else:
-            messages.extend(history_snapshot)
-        try:
-            reply = self.app.client.chat(messages)
-            self.app.root.after(0, self._on_reply, reply, None)
-        except Exception as e:
-            self.app.root.after(0, self._on_reply, None, str(e))
-
-    def _on_reply(self, reply: str | None, err: str | None) -> None:
-        if err:
-            self._append("error", f"[error] {err}\n")
-        else:
-            self.history.append({"role": "assistant", "content": reply or ""})
-            self._append("assistant", f"LLM: {reply}\n")
-            self.app.persist()
-        self._set_busy(False)
-        self.focus_input()
-
-    def on_clear(self) -> None:
-        """Reset this tab to a fresh context. Same thread_id (so persisted
-        snapshot is overwritten on next save) but empty history."""
-        self.history.clear()
-        self.title = "New chat"
-        self.app.rename_tab(self, self.title)
-        self.transcript.configure(state="normal")
-        self.transcript.delete("1.0", "end")
-        self.transcript.configure(state="disabled")
-        self._append("meta", "— thread cleared —\n")
-        self.app.persist()
-
-    def _append(self, tag: str, text: str) -> None:
-        self.transcript.configure(state="normal")
-        self.transcript.insert("end", text, tag)
-        self.transcript.see("end")
-        self.transcript.configure(state="disabled")
-
-    def _set_busy(self, busy: bool) -> None:
-        self.inflight = busy
-        self.send_btn.configure(state=("disabled" if busy else "normal"),
-                                text=("…thinking" if busy else "Send  (Enter)"))
-
-    def to_record(self) -> dict:
-        return {
-            "thread_id": self.thread_id,
-            "title": self.title,
-            "updated_at": _now_iso(),
-            "history": list(self.history),
-        }
-
-
-# ---------- App ---------------------------------------------------------------------
-
-class ChatApp:
-    """Top-level window owning the Notebook, the LLM client, and the
-    single-instance listener."""
-
-    def __init__(self, cfg: dict, instance_sock: socket.socket):
-        self.cfg = cfg
-        self.client = LLMClient(cfg)
-        self.system_prompt = str(cfg.get("system_prompt") or "")
-        self.context_window_turns = max(0, int(cfg.get("context_window_turns") or 0))
-        self.instance_sock = instance_sock
-        self.ingest_nonce = _ensure_ingest_nonce()
-        self._accept_pause = threading.Event()
-        self.show_q: queue.Queue[str] = queue.Queue()
-        self.tabs: list[ConversationTab] = []
-
-        win = cfg.get("window") or {}
-        self.root = tk.Tk()
-        self.root.title(str(win.get("title", "Local LLM Chat")))
-        w, h = int(win.get("width", 640)), int(win.get("height", 600))
-        self.root.geometry(f"{w}x{h}")
-        self.root.minsize(420, 360)
-        if win.get("topmost", True):
-            self.root.attributes("-topmost", True)
-        # X closes the chat process so reopening always picks up the active model.
-        self.root.protocol("WM_DELETE_WINDOW", self.on_quit)
-
-        self._build_ui()
-        self._bind_keys()
-        self._restore_threads()
-        self._start_instance_listener()
-        self._poll_show_queue()
-
-    def _build_ui(self) -> None:
-        top = ttk.Frame(self.root, padding=(8, 8, 8, 0))
-        top.pack(fill="x")
-        self.status_label = ttk.Label(
-            top,
-            text=f"model: {self.client.model} @ {self.client.base_url}",
-            foreground="#666",
-        )
-        self.status_label.pack(side="left")
-        ttk.Button(top, text="× Close tab", command=self.close_current_tab).pack(side="right", padx=(4, 0))
-        ttk.Button(top, text="History…", command=self.open_history_picker).pack(side="right", padx=(4, 0))
-        ttk.Button(top, text="+ New chat", command=self.new_tab).pack(side="right")
-
-        self.notebook = ttk.Notebook(self.root)
-        self.notebook.pack(fill="both", expand=True, padx=8, pady=8)
-        self.notebook.enable_traversal()
-        self.notebook.bind("<<NotebookTabChanged>>", lambda e: self._focus_current())
-
-    def _bind_keys(self) -> None:
-        # bind_all (not bind) so the keystroke fires even when focus is inside
-        # a Text widget — Text has built-in Ctrl-letter bindings that would
-        # otherwise consume the event. Handlers return "break" to stop the
-        # Text widget from also processing the key.
-        def wrap(fn):
-            def handler(_event):
-                fn()
-                return "break"
-            return handler
-
-        self.root.bind_all("<Escape>", wrap(self.on_hide))
-        self.root.bind_all("<Control-q>", wrap(self.on_quit))
-        self.root.bind_all("<Control-Q>", wrap(self.on_quit))
-        self.root.bind_all("<Control-t>", wrap(self.new_tab))
-        self.root.bind_all("<Control-T>", wrap(self.new_tab))
-        self.root.bind_all("<Control-w>", wrap(self.close_current_tab))
-        self.root.bind_all("<Control-W>", wrap(self.close_current_tab))
-        self.root.bind_all("<Control-Tab>", wrap(lambda: self._cycle_tab(1)))
-        self.root.bind_all("<Control-Shift-Tab>", wrap(lambda: self._cycle_tab(-1)))
-        # Some Windows Tk builds report shift-tab as ISO_Left_Tab.
-        self.root.bind_all("<Control-ISO_Left_Tab>", wrap(lambda: self._cycle_tab(-1)))
-
-    def _cycle_tab(self, delta: int) -> None:
-        if not self.tabs:
-            return
-        count = self.notebook.index("end")
-        if count <= 0:
-            return
-        current_idx = self.notebook.index(self.notebook.select())
-        self.notebook.select((current_idx + delta) % count)
-
-    def _restore_threads(self) -> None:
-        # Always start with a fresh tab. Past threads remain on disk and are
-        # reachable via the "History…" picker.
-        self.new_tab()
-
-    def _add_tab(self, tab: ConversationTab) -> None:
-        self.tabs.append(tab)
-        self.notebook.add(tab.frame, text=tab.title)
-
-    def new_tab(self) -> None:
-        tab = ConversationTab(self, None)
-        self._add_tab(tab)
-        self.notebook.select(self.notebook.index("end") - 1)
-        tab.focus_input()
-
-    def close_current_tab(self) -> None:
-        if not self.tabs:
-            return
-        idx = self.notebook.index(self.notebook.select())
-        tab = self.tabs.pop(idx)
-        self.notebook.forget(idx)
-        tab.frame.destroy()
-        if not self.tabs:
-            self.new_tab()
-
-    def rename_tab(self, tab: ConversationTab, title: str) -> None:
-        try:
-            idx = self.tabs.index(tab)
-        except ValueError:
-            return
-        self.notebook.tab(idx, text=title)
-
-    def persist(self) -> None:
-        """Merge currently-open tabs with what's on disk so threads not
-        currently loaded in a tab are preserved."""
-        merged: dict[str, dict] = {}
-        for t in load_threads():
-            tid = t.get("thread_id")
-            if tid:
-                merged[tid] = t
-        for tab in self.tabs:
-            # Skip empty new-chat tabs the user never typed into.
-            if not tab.history:
-                continue
-            rec = tab.to_record()
-            merged[rec["thread_id"]] = rec
-        ordered = sorted(merged.values(), key=lambda r: str(r.get("updated_at") or ""), reverse=True)
-        save_threads(ordered)
-
-    def open_history_picker(self) -> None:
-        threads = load_threads()
-        # Hide any thread already open in a tab to avoid duplicate opens.
-        open_ids = {t.thread_id for t in self.tabs}
-        available = [t for t in threads if t.get("thread_id") not in open_ids]
-
-        dlg = tk.Toplevel(self.root)
-        dlg.title("Chat history")
-        dlg.transient(self.root)
-        dlg.geometry("520x380")
-        try:
-            dlg.attributes("-topmost", True)
-        except tk.TclError:
-            pass
-
-        ttk.Label(dlg, text=f"{len(available)} saved thread(s). Double-click or Open to reopen.",
-                  foreground="#666").pack(fill="x", padx=10, pady=(10, 4))
-
-        listframe = ttk.Frame(dlg)
-        listframe.pack(fill="both", expand=True, padx=10, pady=4)
-        scrollbar = ttk.Scrollbar(listframe, orient="vertical")
-        listbox = tk.Listbox(listframe, yscrollcommand=scrollbar.set, activestyle="dotbox",
-                             font=("Segoe UI", 10))
-        scrollbar.config(command=listbox.yview)
-        scrollbar.pack(side="right", fill="y")
-        listbox.pack(side="left", fill="both", expand=True)
-        for t in available:
-            updated = str(t.get("updated_at") or "")[:19]
-            title = str(t.get("title") or "(untitled)")
-            msgs = len(t.get("history") or [])
-            listbox.insert("end", f"{updated}  •  {title}  ({msgs} msg)")
-
-        def reopen():
-            sel = listbox.curselection()
-            if not sel:
-                return
-            tab = ConversationTab(self, available[sel[0]])
-            self._add_tab(tab)
-            self.notebook.select(self.notebook.index("end") - 1)
-            tab.focus_input()
-            dlg.destroy()
-
-        def delete():
-            sel = listbox.curselection()
-            if not sel:
-                return
-            removed = available.pop(sel[0])
-            listbox.delete(sel[0])
-            remaining = [r for r in load_threads() if r.get("thread_id") != removed.get("thread_id")]
-            save_threads(remaining)
-
-        bar = ttk.Frame(dlg)
-        bar.pack(fill="x", padx=10, pady=(4, 10))
-        ttk.Button(bar, text="Open", command=reopen).pack(side="right", padx=(4, 0))
-        ttk.Button(bar, text="Delete", command=delete).pack(side="right", padx=(4, 0))
-        ttk.Button(bar, text="Close", command=dlg.destroy).pack(side="right")
-
-        listbox.bind("<Double-Button-1>", lambda e: reopen())
-        listbox.bind("<Return>", lambda e: reopen())
-        dlg.bind("<Escape>", lambda e: dlg.destroy())
-        if available:
-            listbox.selection_set(0)
-            listbox.focus_set()
-
-    def _focus_current(self) -> None:
-        try:
-            idx = self.notebook.index(self.notebook.select())
-            self.tabs[idx].focus_input()
-        except (tk.TclError, IndexError):
-            pass
-
-    def on_hide(self) -> None:
-        try:
-            self.root.withdraw()
-        except tk.TclError:
-            pass
-
-    def on_quit(self) -> None:
-        self.persist()
-        try:
-            self.instance_sock.close()
-        except Exception:
-            pass
-        self.root.destroy()
-
-    def _apply_runtime_config(self, cfg: dict) -> None:
-        self.cfg = cfg
-        self.client = LLMClient(cfg)
-        self.system_prompt = str(cfg.get("system_prompt") or "")
-        self.context_window_turns = max(0, int(cfg.get("context_window_turns") or 0))
-        if hasattr(self, "status_label"):
-            self.status_label.configure(
-                text=f"model: {self.client.model} @ {self.client.base_url}"
-            )
-
-    def reload_runtime_config(self, *, min_interval: float = 0.0) -> None:
-        """Re-read shared config and refresh the status bar + LLM client."""
-        now = time.monotonic()
-        last = getattr(self, "_last_cfg_reload", 0.0)
-        if min_interval > 0 and (now - last) < min_interval:
-            return
-        self._last_cfg_reload = now
-        self._apply_runtime_config(load_config())
-
-    def show(self) -> None:
-        self.reload_runtime_config()
-        self.root.deiconify()
-        self.root.lift()
-        self.root.focus_force()
-        self._focus_current()
-
-    def _start_instance_listener(self) -> None:
-        def loop():
-            while True:
-                try:
-                    conn, _ = self.instance_sock.accept()
-                except BlockingIOError:
-                    self._accept_pause.wait(0.15)
-                    continue
-                except OSError:
-                    return
-                try:
-                    # Read up to 64 KiB so an ingest payload with a large
-                    # selection fits comfortably. Legacy "SHOW\n" still works.
-                    raw = conn.recv(65536)
-                finally:
-                    try:
-                        conn.close()
-                    except OSError:
-                        pass
-                self._dispatch_message(raw)
-        threading.Thread(target=loop, daemon=True).start()
-
-    def _dispatch_message(self, raw: bytes) -> None:
-        """Parse wire messages: JSON ingest payloads, or line-based RELOAD/SHOW."""
-        text = (raw or b"").strip().decode("utf-8", errors="replace")
-        if not text:
-            return
-        if text.startswith("{"):
-            try:
-                msg = json.loads(text)
-            except Exception:
-                msg = None
-            if isinstance(msg, dict) and msg.get("type") == "ingest":
-                if str(msg.get("nonce") or "") != self.ingest_nonce:
-                    log.warning("rejected ingest: nonce mismatch")
-                    return
-                self.show_q.put({
-                    "type": "ingest",
-                    "text": str(msg.get("text") or ""),
-                    "source_app": str(msg.get("source_app") or ""),
-                })
-                return
-        handled = False
-        for line in text.splitlines():
-            cmd = line.strip().upper()
-            if cmd == "RELOAD":
-                self.show_q.put("reload")
-                handled = True
-            elif cmd == "QUIT":
-                self.show_q.put("quit")
-                handled = True
-            elif cmd == "SHOW":
-                self.show_q.put("show")
-                handled = True
-        if not handled:
-            self.show_q.put("show")
-
-    def _poll_show_queue(self) -> None:
-        self.reload_runtime_config(min_interval=2.0)
-        try:
-            while True:
-                item = self.show_q.get_nowait()
-                if isinstance(item, dict) and item.get("type") == "ingest":
-                    self._handle_ingest(item.get("text", ""), item.get("source_app", ""))
-                elif item == "quit":
-                    self.on_quit()
-                elif item == "reload":
-                    self.reload_runtime_config()
-                elif item == "show":
-                    self.show()
-                else:
-                    self.show()
-        except queue.Empty:
-            pass
-        self.root.after(150, self._poll_show_queue)
-
-    def _handle_ingest(self, text: str, source_app: str) -> None:
-        """Open a fresh tab with the ingested selection + action picker."""
-        self.new_tab()
-        current = self._current_tab()
-        if current is not None:
-            current.ingest_selection(text, source_app)
-        self.show()
-
-    def _current_tab(self) -> ConversationTab | None:
-        idx = self.notebook.index("current") if self.notebook.tabs() else None
-        if idx is None or idx < 0 or idx >= len(self.tabs):
-            return None
-        return self.tabs[idx]
-
-    def run(self) -> None:
-        self.root.mainloop()
-
-
-def _is_pid_alive(pid: int) -> bool:
-    if pid <= 0:
-        return False
-    try:
-        result = subprocess.run(
-            ["tasklist", "/FI", f"PID eq {pid}"],
-            capture_output=True,
-            text=True,
-            timeout=5,
-            creationflags=NO_WINDOW,
-        )
-    except (OSError, subprocess.TimeoutExpired):
-        return False
-    return str(pid) in ((result.stdout or "") + (result.stderr or ""))
-
-
-def _watch_parent_pid(parent_pid: int, app: ChatApp) -> None:
-    """Exit chat when the launching grammarFix.ahk process goes away.
-
-    Preferred path mirrors ffp_daemon._watch_parent: open a SYNCHRONIZE handle
-    and block on WaitForSingleObject — the kernel signals the instant the
-    parent exits, with zero polling cost. Falls back to the old 5-second
-    tasklist poll only when the WinAPI path is unavailable (spawning tasklist
-    every 5s forever was measurable noise in Process Monitor)."""
-    if parent_pid <= 0:
-        return
-
-    def quit_app() -> None:
-        try:
-            app.root.after(0, app.on_quit)
-        except Exception:
-            pass
-
-    def loop() -> None:
-        try:
-            import ctypes
-            from ctypes import wintypes
-            PROCESS_SYNCHRONIZE = 0x00100000
-            kernel32 = ctypes.WinDLL("kernel32", use_last_error=True)
-            kernel32.OpenProcess.argtypes = [wintypes.DWORD, wintypes.BOOL, wintypes.DWORD]
-            kernel32.OpenProcess.restype = wintypes.HANDLE
-            kernel32.WaitForSingleObject.argtypes = [wintypes.HANDLE, wintypes.DWORD]
-            kernel32.WaitForSingleObject.restype = wintypes.DWORD
-            kernel32.CloseHandle.argtypes = [wintypes.HANDLE]
-
-            handle = kernel32.OpenProcess(PROCESS_SYNCHRONIZE, False, parent_pid)
-            if handle:
-                try:
-                    while True:
-                        # Finite timeout so a daemon thread never sits in an
-                        # uninterruptible FFI call across interpreter exit.
-                        rc = kernel32.WaitForSingleObject(handle, 10000)
-                        if rc == 0:  # WAIT_OBJECT_0 -> parent exited
-                            quit_app()
-                            return
-                        # rc == 258 (WAIT_TIMEOUT) -> keep waiting
-                finally:
-                    kernel32.CloseHandle(handle)
-        except Exception:
-            pass  # fall through to polling
-
-        while True:
-            time.sleep(5)
-            if not _is_pid_alive(parent_pid):
-                quit_app()
-                return
-
-    threading.Thread(target=loop, daemon=True, name="chat-parent-watch").start()
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser(description="Flowkey local LLM chat popup")
-    parser.add_argument("--parent-pid", type=int, default=0,
-                        help="exit when this PID disappears (grammarFix.ahk)")
-    args = parser.parse_args()
-
-    cfg = load_config()
-    port = int((cfg.get("window") or {}).get("single_instance_port", 52640))
-
-    sock = try_acquire_single_instance(port)
-    if sock is None:
-        ping_existing_instance(port)
-        return 0
-
-    app = ChatApp(cfg, sock)
-    _watch_parent_pid(args.parent_pid, app)
-    app.run()
-    return 0
-
-
-if __name__ == "__main__":
-    sys.exit(main())
diff --git a/scripts/ffp_daemon.py b/scripts/ffp_daemon.py
index d8f6aa1..d22bb81 100644
--- a/scripts/ffp_daemon.py
+++ b/scripts/ffp_daemon.py
@@ -21,7 +21,6 @@
 import json
 import logging
 import logging.handlers
-import os
 import socket
 import subprocess
 import sys
@@ -88,15 +87,6 @@ def _popen_logged(name: str, argv: list[str], **kwargs) -> subprocess.Popen:
     return proc
 
 
-def _chat_launch_argv() -> list[str]:
-    parent_arg = ["--parent-pid", str(os.getpid())]
-    if getattr(sys, "frozen", False):
-        chat_exe = Path(sys.executable).with_name("ffp-chat.exe")
-        if chat_exe.exists():
-            return [str(chat_exe), *parent_arg]
-    return [sys.executable, str(HERE / "chat_popup.py"), *parent_arg]
-
-
 HOST = "127.0.0.1"
 DEFAULT_PORT = 52650
 # API_VERSION doubles as the required POST header value (X-FFP-API). The AHK
@@ -567,93 +557,6 @@ def _act_shutdown(_args: dict) -> str:
     return "shutting_down"
 
 
-def _read_chat_ingest_nonce() -> str:
-    """Read the chat instance's ingest nonce (empty when chat is not running)."""
-    nonce_path = _paths.DATA_DIR / ".chat_ingest_nonce"
-    try:
-        if nonce_path.exists():
-            return nonce_path.read_text(encoding="utf-8").strip()
-    except OSError:
-        pass
-    return ""
-
-
-def _build_chat_ingest_payload(text: str, source_app: str) -> bytes:
-    """Build an ingest wire message; nonce is read fresh on every call."""
-    return json.dumps({
-        "type": "ingest",
-        "text": text,
-        "source_app": source_app,
-        "nonce": _read_chat_ingest_nonce(),
-    }, ensure_ascii=False).encode("utf-8")
-
-
-def _act_chat_reload(_args: dict) -> str:
-    """Tell a running chat popup to reload config (model/base_url). Best-effort."""
-    import socket as _sock
-
-    chat_port = 52640
-    try:
-        with _sock.create_connection(("127.0.0.1", chat_port), timeout=0.5) as c:
-            c.sendall(b"RELOAD\n")
-        return "ok"
-    except OSError:
-        return "chat not running"
-
-
-def _act_chat_restart(_args: dict) -> str:
-    """Quit a running chat popup so the next open loads fresh config/model."""
-    import socket as _sock
-
-    chat_port = 52640
-    try:
-        with _sock.create_connection(("127.0.0.1", chat_port), timeout=0.5) as c:
-            c.sendall(b"QUIT\n")
-        return "ok"
-    except OSError:
-        return "chat not running"
-
-
-def _act_chat_send_selection(args: dict) -> dict:
-    """Send a selection to the chat single-instance port (52640) as an
-    ingest payload. If chat isn't running, spawn it first and retry."""
-    import socket as _sock
-
-    text = str(args.get("text") or "")
-    source_app = str(args.get("source_app") or "")
-    if not text:
-        return {"ok": False, "error": "empty selection"}
-
-    chat_port = 52640  # single_instance_port; matches grammar_hotkey.config.example.json
-
-    def _try_send() -> bytes | None:
-        payload = _build_chat_ingest_payload(text, source_app)
-        try:
-            with _sock.create_connection(("127.0.0.1", chat_port), timeout=0.5) as c:
-                c.sendall(payload + b"\n")
-            return payload
-        except OSError:
-            return None
-
-    sent = _try_send()
-    if sent is not None:
-        return {"ok": True, "spawned": False, "bytes": len(sent)}
-
-    # Chat not running — spawn it and wait briefly for the listener to bind.
-    try:
-        _popen_logged("chat_popup_for_ingest", _chat_launch_argv(), cwd=str(HERE))
-    except Exception as e:
-        return {"ok": False, "error": f"chat spawn failed: {e}"}
-
-    for _ in range(20):  # up to ~2s
-        time.sleep(0.1)
-        sent = _try_send()
-        if sent is not None:
-            return {"ok": True, "spawned": True, "bytes": len(sent)}
-
-    return {"ok": False, "error": "chat did not accept ingest after spawn"}
-
-
 # ---- Web-dashboard chat (daemon-backed; replaces the retired popup) ----------
 def _act_chat_threads_list(_args: dict) -> dict:
     import ffp_chat
@@ -742,9 +645,6 @@ def _act_chat_take_staged(_args: dict) -> dict:
     "chat_thread_delete": _act_chat_thread_delete,
     "chat_stage_selection": _act_chat_stage_selection,
     "chat_take_staged": _act_chat_take_staged,
-    "chat_send_selection": _act_chat_send_selection,
-    "chat_reload": _act_chat_reload,
-    "chat_restart": _act_chat_restart,
     "get_autostart_state": _act_get_autostart_state,
     "set_autostart": _act_set_autostart,
     "open_dashboard": _act_open_dashboard,
diff --git a/scripts/grammarFix.ahk b/scripts/grammarFix.ahk
index 21b945c..fb2974c 100644
--- a/scripts/grammarFix.ahk
+++ b/scripts/grammarFix.ahk
@@ -15,7 +15,7 @@ global ffpBusyAction := ""
 ; so we wrap each zero-arg handler in a variadic fat-arrow lambda — this is
 ; the pattern that worked in earlier versions before the Map refactor.
 gramHk := (*) => ProcessSelection()
-chatHk := (*) => LaunchChat()
+chatHk := (*) => OpenWebDashboard("chat")
 noteHk := (*) => CaptureNote()
 askHk  := (*) => AskWithSelection()
 Hotkey("^+g", gramHk)
@@ -56,7 +56,6 @@ dataDir     := runtimePaths["dataDir"]
 logsDir     := runtimePaths["logsDir"]
 
 scriptPath        := runtimePaths["scriptPath"]
-chatScriptPath    := runtimePaths["chatScriptPath"]
 daemonScriptPath  := runtimePaths["daemonScriptPath"]
 configPath        := runtimePaths["configPath"]
 configExamplePath := runtimePaths["configExamplePath"]
@@ -439,8 +438,8 @@ RunCmdExec(cmd) {
     return RunCmdExec_Impl(cmd)
 }
 
-LaunchChat() {
-    return LaunchChat_Impl()
+OpenWebDashboard(tab := "") {
+    return OpenWebDashboard_Impl(tab)
 }
 
 ShutdownFlowkeyChildren(ExitReason := "", ExitCode := "") {
@@ -543,11 +542,12 @@ AskWithSelectionImpl() {
 
     body := '{"args":{"text":"' EscapeJson(captured)
         . '","source_app":"' EscapeJson(sourceApp) '"}}'
-    result := RunActionViaDaemon("chat_send_selection", body)
+    result := RunActionViaDaemon("chat_stage_selection", body)
     if (result = "") {
         Notify("Flowkey", "Ask: daemon unavailable.")
         return
     }
+    OpenWebDashboard("chat")
     Notify("Flowkey", "💬 Sent to chat (" StrLen(captured) " chars).")
 }
 
diff --git a/scripts/lib/daemon_client.ahk b/scripts/lib/daemon_client.ahk
index f3d7b25..b2aab78 100644
--- a/scripts/lib/daemon_client.ahk
+++ b/scripts/lib/daemon_client.ahk
@@ -2,7 +2,7 @@
 global DAEMON_ONLY_ACTIONS := Map(
     "pull_start", 1, "pull_status", 1,
     "bench_start", 1, "bench_status", 1, "bench_history", 1,
-    "chat_send_selection", 1, "chat_reload", 1, "chat_restart", 1, "save_note", 1,
+    "chat_send", 1, "chat_thread_delete", 1, "chat_stage_selection", 1, "save_note", 1,
     "set_autostart", 1, "get_autostart_state", 1,
     "notify", 1, "open_dashboard", 1,
     "flm_update_check", 1, "note_search", 1,
@@ -243,7 +243,7 @@ ResolvePythonwPath_Impl() {
 }
 
 ; --- Entrypoint launching (frozen exe vs dev .py) ---------------------------
-; The four Python entrypoints (grammar_fix, ffp_daemon, chat_popup, first_run)
+; The Python entrypoints (grammar_fix, ffp_daemon, first_run)
 ; ship as frozen exes in an installed build, flattened into the install root.
 ; grammarFix.ahk runs from {app}\scripts, so A_ScriptDir\.. is the install root
 ; and the exe is A_ScriptDir\..\<exeName>. A frozen exe IS its script's
@@ -274,22 +274,6 @@ RunPython_Impl(args) {
     return shell.Exec(Format('"{}" {}', ResolvePythonwPath_Impl(), args))
 }
 
-LaunchChat_Impl() {
-    global chatScriptPath
-    if (FrozenEntrypointExe_Impl("ffp-chat.exe") = "" && !FileExist(chatScriptPath)) {
-        Notify("Flowkey", "Chat entrypoint not found (ffp-chat.exe / chat_popup.py)")
-        return
-    }
-    RunAction("chat_restart")
-    Sleep 200
-    parentPid := ProcessExist()
-    try {
-        Run(EntrypointCmd_Impl("ffp-chat.exe", chatScriptPath, Format('--parent-pid {}', parentPid)), A_ScriptDir, "Hide")
-    } catch as e {
-        Notify("Flowkey", "Chat launch failed: " e.Message)
-    }
-}
-
 ; Graceful + forced cleanup of Flowkey-owned pythonw children on script exit.
 global flowkeyShutdownDone := false
 
@@ -299,7 +283,6 @@ ShutdownFlowkeyChildren_Impl(ExitReason := "", ExitCode := "") {
         return
     flowkeyShutdownDone := true
 
-    try RunAction("chat_restart")
     try RunAction("shutdown")
     Sleep 400
     KillFlowkeyPythonProcesses_Impl()
@@ -315,14 +298,13 @@ KillFlowkeyPythonProcesses_Impl() {
             if (cmd = "" || !InStr(cmd, scriptDir))
                 continue
             if !(InStr(cmd, "ffp_daemon.py")
-                || InStr(cmd, "chat_popup")
                 || InStr(cmd, "grammar_fix.py"))
                 continue
             try ProcessClose(proc.ProcessId)
         }
         ; Production: frozen exes launched from the install root (appDir). The
         ; --parent-pid watchdog already exits them when we die; this is a backstop.
-        for exeName in ["ffp-daemon.exe", "ffp-chat.exe", "ffp-grammar-fix.exe"] {
+        for exeName in ["ffp-daemon.exe", "ffp-grammar-fix.exe"] {
             for proc in ComObjGet("winmgmts:").ExecQuery("SELECT ProcessId, ExecutablePath FROM Win32_Process WHERE Name='" exeName "'") {
                 exePath := proc.ExecutablePath
                 if (exePath = "" || (appDir != "" && !InStr(exePath, appDir)))
diff --git a/scripts/lib/paths.ahk b/scripts/lib/paths.ahk
index 5e55539..d8ca80b 100644
--- a/scripts/lib/paths.ahk
+++ b/scripts/lib/paths.ahk
@@ -53,7 +53,6 @@ BuildRuntimePaths() {
         "dataDir", userRoot "\\data",
         "logsDir", userRoot "\\logs",
         "scriptPath", A_ScriptDir "\\grammar_fix.py",
-        "chatScriptPath", A_ScriptDir "\\chat_popup.py",
         "daemonScriptPath", A_ScriptDir "\\ffp_daemon.py",
         "configPath", userRoot "\\config\\grammar_hotkey.config.json",
         "configExamplePath", ResolveConfigExamplePath(appDir, userRoot),
diff --git a/scripts/ui/tray.ahk b/scripts/ui/tray.ahk
index 8df8d86..513f8ca 100644
--- a/scripts/ui/tray.ahk
+++ b/scripts/ui/tray.ahk
@@ -1,6 +1,6 @@
 SetupTrayMenu_Impl() {
     A_TrayMenu.Delete()
-    A_TrayMenu.Add("Open Chat`tCtrl+Shift+T", (*) => LaunchChat())
+    A_TrayMenu.Add("Open Chat`tCtrl+Shift+T", (*) => OpenWebDashboard_Impl("chat"))
     A_TrayMenu.Add("Dashboard", (*) => OpenWebDashboard_Impl())
     A_TrayMenu.Add()
     A_TrayMenu.Add("Quick toggles", BuildTogglesMenu_Impl())
@@ -98,13 +98,13 @@ BuildServerMenu_Impl() {
     return m
 }
 
-OpenWebDashboard_Impl() {
+OpenWebDashboard_Impl(tab := "") {
     global daemonBaseUrl
     if !EnsureDaemonRunning_Impl() {
         Notify("Flowkey", "Web dashboard: daemon could not be started.")
         return
     }
-    url := daemonBaseUrl "/"
+    url := daemonBaseUrl "/" (tab != "" ? "#" tab : "")
     ; Edge app mode = standalone chromeless window; fall back to the default
     ; browser when Edge is unavailable.
     try Run('msedge.exe --app=' url)
diff --git a/scripts/ui/web/app.js b/scripts/ui/web/app.js
index 5911280..00dcd5c 100644
--- a/scripts/ui/web/app.js
+++ b/scripts/ui/web/app.js
@@ -657,7 +657,6 @@ async function setActiveModel() {
   if (!name) return;
   try {
     await action("apply_config_patch", { patch: { llm: { model: name } } });
-    await action("chat_restart");
     setStatus("config-status", `✅ Active model: ${name}`);
     loadModels();
     loadServerStatus();
diff --git a/tests/test_chat_popup.py b/tests/test_chat_popup.py
deleted file mode 100644
index 91d2b33..0000000
--- a/tests/test_chat_popup.py
+++ /dev/null
@@ -1,132 +0,0 @@
-from __future__ import annotations
-
-import importlib
-import json
-import sys
-
-
-def test_chat_load_config_tracks_flm_model_over_stale_chat_block(isolated_release_root):
-    config_path = isolated_release_root / "config" / "grammar_hotkey.config.json"
-    config_path.write_text(
-        json.dumps(
-            {
-                "flm_model": "new:model",
-                "flm_base_url": "http://127.0.0.1:52625",
-                "chat": {
-                    "llm_model": "stale:old",
-                    "llm_base_url": "http://127.0.0.1:99999",
-                    "temperature": 0.7,
-                },
-            }
-        ),
-        encoding="utf-8",
-    )
-    sys.modules.pop("chat_popup", None)
-    sys.modules.pop("paths", None)
-    chat = importlib.import_module("chat_popup")
-    # Isolated test config — skip live daemon overlay when present.
-    chat._overlay_live_flm_settings = lambda cfg: cfg
-
-    cfg = chat.load_config()
-
-    assert cfg["llm_model"] == "new:model"
-    assert cfg["llm_base_url"] == "http://127.0.0.1:52625"
-    assert cfg["temperature"] == 0.7
-
-
-def _import_chat(isolated_release_root):
-    sys.modules.pop("chat_popup", None)
-    sys.modules.pop("paths", None)
-    return importlib.import_module("chat_popup")
-
-
-def test_notes_context_message_formats_hits_and_titles(isolated_release_root):
-    chat = _import_chat(isolated_release_root)
-
-    def fake_search(query, limit):
-        assert query == "what did I save about claude?"
-        assert limit == 4
-        return {"results": [
-            {"title": "Claude AI Introduction", "category": "research", "snippet": "Claude is a model by Anthropic…"},
-            {"title": "Prompting tips", "category": "work/technical", "snippet": "Use XML tags."},
-        ]}
-
-    msg, titles = chat.build_notes_context_message("what did I save about claude?", fake_search)
-
-    assert titles == ["Claude AI Introduction", "Prompting tips"]
-    assert "[Claude AI Introduction] (research)" in msg
-    assert "Use XML tags." in msg
-    assert "cite note titles" in msg
-
-
-def test_notes_context_message_empty_and_error_fall_back(isolated_release_root):
-    chat = _import_chat(isolated_release_root)
-
-    assert chat.build_notes_context_message("x", lambda q, n: {"results": []}) == (None, [])
-    assert chat.build_notes_context_message("x", lambda q, n: None) == (None, [])
-
-    def boom(q, n):
-        raise RuntimeError("vault offline")
-
-    assert chat.build_notes_context_message("x", boom) == (None, [])
-
-
-def test_chat_load_config_prefers_shared_llm_block(isolated_release_root):
-    config_path = isolated_release_root / "config" / "grammar_hotkey.config.json"
-    config_path.write_text(
-        json.dumps(
-            {
-                "llm": {
-                    "provider": "ollama",
-                    "base_url": "http://127.0.0.1:11434",
-                    "model": "llama3.2:3b",
-                    "auth_bearer": "ollama",
-                },
-                "chat": {
-                    "llm_model": "stale:old",
-                    "llm_base_url": "http://127.0.0.1:99999",
-                    "llm_auth_bearer": "stale",
-                },
-            }
-        ),
-        encoding="utf-8",
-    )
-    sys.modules.pop("chat_popup", None)
-    sys.modules.pop("paths", None)
-    chat = importlib.import_module("chat_popup")
-    chat._overlay_live_flm_settings = lambda cfg: cfg
-
-    cfg = chat.load_config()
-
-    assert cfg["llm_model"] == "llama3.2:3b"
-    assert cfg["llm_base_url"] == "http://127.0.0.1:11434"
-    assert cfg["llm_auth_bearer"] == "ollama"
-
-
-def test_watch_parent_pid_quits_app_when_parent_dies(isolated_release_root):
-    # The watch must use the kernel wait (no tasklist polling) and fire the
-    # app quit shortly after the parent process exits.
-    import subprocess
-    import time
-    import types
-
-    chat = _import_chat(isolated_release_root)
-    parent = subprocess.Popen([sys.executable, "-c", "import time; time.sleep(30)"])
-    quit_calls: list = []
-    fake_app = types.SimpleNamespace(
-        on_quit=lambda: None,
-        root=types.SimpleNamespace(after=lambda _delay, fn: quit_calls.append(fn)),
-    )
-    try:
-        chat._watch_parent_pid(parent.pid, fake_app)
-        time.sleep(0.4)
-        assert not quit_calls  # parent still alive -> no quit yet
-        parent.kill()
-        parent.wait(timeout=5)
-        deadline = time.time() + 5
-        while time.time() < deadline and not quit_calls:
-            time.sleep(0.1)
-        assert quit_calls, "parent exit was not detected within 5s"
-    finally:
-        if parent.poll() is None:
-            parent.kill()
diff --git a/tests/test_ffp_daemon.py b/tests/test_ffp_daemon.py
index 83670eb..c02789b 100644
--- a/tests/test_ffp_daemon.py
+++ b/tests/test_ffp_daemon.py
@@ -67,20 +67,21 @@ def test_actions_count_and_expected_names(daemon_module):
     # v1.6 web dashboard added recent_history + notes_list + mode_ids -> 51;
     # provider work added provider_status -> 52; model_recommendations -> 53;
     # web chat backend added chat_threads_list/chat_thread_get/chat_send/
-    # chat_thread_delete/chat_stage_selection/chat_take_staged -> 59.
-    assert len(daemon_module.ACTIONS) == 59
+    # chat_thread_delete/chat_stage_selection/chat_take_staged -> 59; retiring the
+    # tkinter popup removed chat_send_selection/chat_reload/chat_restart -> 56.
+    assert len(daemon_module.ACTIONS) == 56
     for a in ("chat_threads_list", "chat_thread_get", "chat_send",
               "chat_thread_delete", "chat_stage_selection", "chat_take_staged"):
         assert a in daemon_module.ACTIONS
+    # popup-era socket actions are gone (chat is daemon-backed now)
+    for a in ("chat_send_selection", "chat_reload", "chat_restart"):
+        assert a not in daemon_module.ACTIONS
     assert "model_recommendations" in daemon_module.ACTIONS
     assert "recent_history" in daemon_module.ACTIONS
     assert "notes_list" in daemon_module.ACTIONS
     assert "mode_ids" in daemon_module.ACTIONS
     assert "version" in daemon_module.ACTIONS
     assert "apply_config_patch" in daemon_module.ACTIONS
-    assert "chat_send_selection" in daemon_module.ACTIONS
-    assert "chat_reload" in daemon_module.ACTIONS
-    assert "chat_restart" in daemon_module.ACTIONS
     assert "open_dashboard" in daemon_module.ACTIONS
     assert "config_snapshot" in daemon_module.ACTIONS
     assert "provider_status" in daemon_module.ACTIONS
@@ -490,17 +491,3 @@ def test_open_dashboard_writes_marker(daemon_server, tmp_path, monkeypatch):
     assert payload["ok"] is True
     assert payload["result"] == "queued"
     assert (tmp_path / ".open_dashboard").read_text(encoding="utf-8") == "1\n"
-
-
-def test_build_chat_ingest_payload_reads_fresh_nonce(daemon_module, tmp_path, monkeypatch):
-    """After chat spawns it writes a new nonce; each send must re-read the file."""
-    monkeypatch.setattr(daemon_module._paths, "DATA_DIR", tmp_path)
-    nonce_file = tmp_path / ".chat_ingest_nonce"
-    nonce_file.write_text("stale-nonce", encoding="utf-8")
-
-    first = json.loads(daemon_module._build_chat_ingest_payload("hello", "notepad.exe").decode())
-    assert first["nonce"] == "stale-nonce"
-
-    nonce_file.write_text("fresh-nonce", encoding="utf-8")
-    second = json.loads(daemon_module._build_chat_ingest_payload("hello", "notepad.exe").decode())
-    assert second["nonce"] == "fresh-nonce"