diff --git a/api/server.py b/api/server.py
index 85f99ea..53a2be1 100755
--- a/api/server.py
+++ b/api/server.py
@@ -26,6 +26,7 @@
 import json
 import os
 import platform
+import re
 import shutil
 import subprocess
 import sys
@@ -45,6 +46,8 @@
 TOOLS_DIR = GATHM_ROOT / "tools"
 GUI_DIR = GATHM_ROOT / "gui"
 AGENT_SCRIPT = GATHM_ROOT / "agent" / "orchestrator.sh"
+PILOT_DIR = GATHM_ROOT / "pilot"
+CHAT_SCRIPT = PILOT_DIR / "chat_once.py"
 DEFAULT_PORT = 8080
 DEFAULT_HOST = "127.0.0.1"
 
@@ -133,6 +136,36 @@ def list_tools() -> list:
     return tools
 
 
+# Words to remove when extracting a tool's argument from natural language.
+_NL_FILLER = frozenset([
+    "get", "show", "tell", "me", "what", "is", "are", "the", "a", "an",
+    "give", "find", "look", "up", "lookup", "check", "please", "i", "want",
+    "to", "know", "about", "can", "you", "run", "gathm", "use", "do",
+    "for", "in", "at", "on", "from", "of", "and",
+    # common query openers per domain
+    "weather", "forecast", "temperature", "temp",
+    "dns", "records", "record", "query", "lookup",
+    "ip", "address",
+    "define", "definition", "meaning", "word",
+    "crypto", "cryptocurrency", "price", "cost", "value",
+    "news", "latest", "current", "today",
+    "whois", "info", "information",
+    "movie", "film", "song", "lyrics",
+])
+
+
+def _extract_tool_args(query: str, tool_name: str) -> list:
+    """Strip NL filler and the tool name from a query, returning bare args."""
+    filler = _NL_FILLER | {tool_name.lower()}
+    return [w for w in query.split() if w.lower() not in filler]
+
+
+_ANSI_RE = re.compile(r'\x1b\[[0-9;]*[mKJHABCDFG]')
+
+def _strip_ansi(text: str) -> str:
+    return _ANSI_RE.sub('', text)
+
+
 def execute_tool(tool_name: str, args: list = None, timeout: int = 120) -> dict:
     """Execute a tool via the agent orchestrator."""
     args = args or []
@@ -153,8 +186,8 @@ def execute_tool(tool_name: str, args: list = None, timeout: int = 120) -> dict:
             "tool": tool_name,
             "status": "success" if result.returncode == 0 else "error",
             "exit_code": result.returncode,
-            "output": result.stdout.strip(),
-            "error": result.stderr.strip() if result.returncode != 0 else "",
+            "output": _strip_ansi(result.stdout.strip()),
+            "error": _strip_ansi(result.stderr.strip()) if result.returncode != 0 else "",
             "duration_ms": duration_ms,
         }
     except subprocess.TimeoutExpired:
@@ -177,6 +210,54 @@ def execute_tool(tool_name: str, args: list = None, timeout: int = 120) -> dict:
         }
 
 
+def _pilot_python() -> str:
+    """Return the Pilot venv's Python (which has langchain), else fall back."""
+    candidates = [
+        PILOT_DIR / "venv" / "bin" / "python3",
+        PILOT_DIR / "venv" / "bin" / "python",
+        PILOT_DIR / "venv" / "Scripts" / "python.exe",  # Windows
+    ]
+    for c in candidates:
+        if c.exists():
+            return str(c)
+    return sys.executable  # last resort (may lack langchain → handled gracefully)
+
+
+def run_chat_agent(query: str, history: list = None, timeout: int = 180) -> dict:
+    """Run the real Pilot LLM agent for one turn and return its reply.
+
+    Shells out to pilot/chat_once.py using the Pilot venv's Python so the
+    stdlib-only API server stays dependency-free. Returns {"reply": ...} on
+    success, or {"error": ...} which the caller can fall back on.
+    """
+    if not CHAT_SCRIPT.exists():
+        return {"error": "chat agent not installed (pilot/chat_once.py missing)"}
+
+    payload = json.dumps({"query": query, "history": history or []})
+    try:
+        result = subprocess.run(
+            [_pilot_python(), str(CHAT_SCRIPT)],
+            input=payload,
+            capture_output=True,
+            text=True,
+            timeout=timeout,
+            cwd=str(PILOT_DIR),
+            env={**os.environ},
+        )
+    except subprocess.TimeoutExpired:
+        return {"error": f"agent timed out after {timeout}s"}
+    except Exception as e:
+        return {"error": str(e)}
+
+    out = (result.stdout or "").strip()
+    try:
+        return json.loads(out)
+    except json.JSONDecodeError:
+        tail = (result.stderr or "").strip().splitlines()
+        return {"error": "agent returned no parseable response",
+                "detail": tail[-1] if tail else out[:200]}
+
+
 def run_agent_command(command: str, args: str = "") -> dict:
     """Run an agent orchestrator command."""
     cmd = [BASH_CMD, str(AGENT_SCRIPT), command]
@@ -359,14 +440,57 @@ def do_POST(self):
             status = 200 if result["status"] == "success" else 500
             self._send_json(result, status)
 
+        # POST /api/v1/agent/chat
+        # Conversational LLM agent (Pilot: LangGraph + Ollama/Gemini). It
+        # understands the message, decides whether to chat or call a tool,
+        # runs the tool, and writes a natural-language reply. Falls back to
+        # the keyword router if the LLM runtime is unavailable.
+        elif path == "/api/v1/agent/chat":
+            query = body.get("query", "").strip()
+            if not query:
+                self._send_json({"error": "Missing 'query' field"}, 400)
+                return
+
+            history = body.get("history", [])
+            result = run_chat_agent(query, history)
+
+            if "reply" in result:
+                self._send_json(result)
+            else:
+                # LLM agent unavailable — degrade to the keyword router so the
+                # GUI still responds, and tell the client why.
+                fallback = run_agent_command("ask", query)
+                fallback["agent"] = "router-fallback"
+                fallback["chat_error"] = result.get("error", "unknown")
+                self._send_json(fallback)
+
         # POST /api/v1/agent/ask
+        # Matches a tool from the natural-language query, then runs it.
         elif path == "/api/v1/agent/ask":
-            query = body.get("query", "")
+            query = body.get("query", "").strip()
             if not query:
                 self._send_json({"error": "Missing 'query' field"}, 400)
                 return
-            result = run_agent_command("ask", query)
-            self._send_json(result)
+
+            # Allow explicit "gathm run <tool> [args]" passthrough
+            run_prefix = re.match(r'^(?:gathm\s+)?run\s+(\S+)(.*)', query, re.I)
+            if run_prefix:
+                tool_name = run_prefix.group(1)
+                extra     = run_prefix.group(2).strip().split()
+                self._send_json(execute_tool(tool_name, extra))
+                return
+
+            # Route: find the best-matching tool
+            route = run_agent_command("ask", query)
+            tool_name = route.get("matched_tool")
+
+            if tool_name and tool_name != "null":
+                args = _extract_tool_args(query, tool_name)
+                self._send_json(execute_tool(tool_name, args))
+            else:
+                # No tool matched — return the routing result for the UI to
+                # render as a friendly "I can help with…" message
+                self._send_json(route)
 
         # POST /api/v1/agent/plan
         elif path == "/api/v1/agent/plan":
diff --git a/gui/app.js b/gui/app.js
index 29a8454..ea4ecc7 100644
--- a/gui/app.js
+++ b/gui/app.js
@@ -1,64 +1,64 @@
-// Gathm AI — app.js
+// Gathm AI -- app.js
 
 const API_BASE = window.GATHM_API_URL || 'http://127.0.0.1:8080';
 
 lucide.createIcons();
 
-// ── Element refs ───────────────────────────────────────────────
-const aiOrb       = document.getElementById('aiOrb');
-const mainOrb     = document.getElementById('mainOrb');
-const freqBars    = document.getElementById('freqBars');
-const botStatus   = document.getElementById('botStatus');
-const chatArea    = document.getElementById('chatArea');
+// -- Element refs ----------------------------------------------------------
+const aiOrb        = document.getElementById('aiOrb');
+const mainOrb      = document.getElementById('mainOrb');
+const freqBars     = document.getElementById('freqBars');
+const botStatus    = document.getElementById('botStatus');
+const chatArea     = document.getElementById('chatArea');
 const messageInput = document.getElementById('messageInput');
-const sendBtn     = document.getElementById('sendBtn');
-const micBtn      = document.getElementById('micBtn');
+const sendBtn      = document.getElementById('sendBtn');
+const micBtn       = document.getElementById('micBtn');
 
-// ── Orb state ──────────────────────────────────────────────────
+// -- Orb state -------------------------------------------------------------
 function setOrbState(state) {
-    if (aiOrb) aiOrb.className = `ai-orb ${state}`;
+    if (aiOrb) aiOrb.className = 'ai-orb ' + state;
 }
 
-// ── Connectivity ───────────────────────────────────────────────
+// -- Connectivity ----------------------------------------------------------
 let isOnline = false;
 
 async function checkConnectivity() {
-    try {
-        // /ping is instant — /health sweeps every tool and would time out.
-        const res = await fetch(`${API_BASE}/api/v1/ping`, {
-            signal: AbortSignal.timeout(4000),
-        });
-        isOnline = res.ok;
-    } catch {
-        isOnline = false;
+    // Try /ping first (instant). Fall back to /api/v1/tools for older
+    // servers that pre-date the /ping endpoint.
+    isOnline = false;
+    for (const p of ['/api/v1/ping', '/api/v1/tools']) {
+        try {
+            const res = await fetch(API_BASE + p, { signal: AbortSignal.timeout(4000) });
+            if (res.ok) { isOnline = true; break; }
+        } catch (_) { /* try next */ }
     }
-    botStatus.textContent = isOnline ? 'Online · Voice & Text' : 'Offline · API not reachable';
+    botStatus.textContent = isOnline ? 'Online - Voice & Text' : 'Offline - API not reachable';
 }
 
 checkConnectivity();
 setInterval(checkConnectivity, 30000);
 
-// ── Scroll ─────────────────────────────────────────────────────
+// -- Scroll ----------------------------------------------------------------
 function scrollToBottom() {
     chatArea.scrollTop = chatArea.scrollHeight;
 }
 
-// ── Time ───────────────────────────────────────────────────────
+// -- Time ------------------------------------------------------------------
 function formatTime() {
     const d = new Date();
     let h = d.getHours(), m = d.getMinutes();
     const ampm = h >= 12 ? 'PM' : 'AM';
     h = h % 12 || 12;
-    return `${h}:${m < 10 ? '0' + m : m} ${ampm}`;
+    return h + ':' + (m < 10 ? '0' + m : m) + ' ' + ampm;
 }
 
-// ── Messages ───────────────────────────────────────────────────
+// -- Messages --------------------------------------------------------------
 function addMessage(text, sender, cssClass) {
     const wrapper = document.createElement('div');
-    wrapper.className = `message-wrapper ${sender}`;
+    wrapper.className = 'message-wrapper ' + sender;
 
     const msg = document.createElement('div');
-    msg.className = `message ${cssClass || sender + '-text'}`;
+    msg.className = 'message ' + (cssClass || sender + '-text');
 
     const p = document.createElement('p');
     p.textContent = text;
@@ -66,7 +66,7 @@ function addMessage(text, sender, cssClass) {
     wrapper.appendChild(msg);
 
     const time = document.createElement('div');
-    time.className = `message-time ${sender}-time`;
+    time.className = 'message-time ' + sender + '-time';
     time.textContent = formatTime();
 
     chatArea.appendChild(wrapper);
@@ -74,7 +74,7 @@ function addMessage(text, sender, cssClass) {
     scrollToBottom();
 }
 
-// ── Typing indicator ───────────────────────────────────────────
+// -- Typing indicator ------------------------------------------------------
 let typingEl = null;
 
 function showTyping() {
@@ -93,40 +93,32 @@ function showTyping() {
 
 function hideTyping() {
     setOrbState('idle');
-    typingEl?.remove();
-    typingEl = null;
+    if (typingEl) { typingEl.remove(); typingEl = null; }
 }
 
-// ── Render agent reply ─────────────────────────────────────────
-// The /agent/ask endpoint is a tool ROUTER, not a chat LLM. It returns
-// structured JSON; translate it into something human-readable instead of
-// dumping raw JSON into the chat.
+// -- Format API response ---------------------------------------------------
+// The /agent/chat endpoint returns {reply} from the LLM agent. If the agent
+// is unavailable the server falls back to the keyword router, so we still
+// handle those shapes gracefully.
 function formatAgentReply(data) {
-    // A tool was matched to the query
+    if (data.reply) return data.reply;                    // LLM agent answer
+    if (data.status === 'success' && data.output) return data.output;
     if (data.matched_tool && data.matched_tool !== 'null') {
-        const desc   = data.description ? `\n\n${data.description}` : '';
-        const action = data.action ? `\n\n→ ${data.action}` : `\n\n→ Run: gathm run ${data.matched_tool}`;
-        return `I can help with that using the “${data.matched_tool}” tool.${desc}${action}`;
+        return 'I can help with that using the "' + data.matched_tool + '" tool.' +
+               (data.description ? '\n\n' + data.description : '');
     }
-
-    // No tool matched — give a friendly, useful nudge instead of an error blob
     if (data.error && /no matching tool/i.test(data.error)) {
-        return "I'm a tool-running assistant, so I work best with task requests. " +
-               "Try things like:\n" +
-               "  • weather in Tokyo\n" +
-               "  • dns records for github.com\n" +
-               "  • ip info 8.8.8.8\n" +
-               "  • define serendipity\n" +
-               "  • crypto price bitcoin";
+        return "I couldn't find a tool for that. Try: weather in Tokyo, " +
+               "dns github.com, ip info 8.8.8.8, define serendipity.";
     }
-
-    // Any other shape: prefer real output, fall back to the error text
     return data.raw_output || data.output || data.result || data.error
         || JSON.stringify(data, null, 2);
 }
 
-// ── Send via API ───────────────────────────────────────────────
+// -- Send via API ----------------------------------------------------------
 let isSending = false;
+let history = [];                 // conversation memory for multi-turn context
+const HISTORY_MAX = 12;           // keep the last N turns
 
 async function sendMessage() {
     const text = messageInput.value.trim();
@@ -139,29 +131,36 @@ async function sendMessage() {
     showTyping();
 
     try {
-        const res = await fetch(`${API_BASE}/api/v1/agent/ask`, {
+        const res = await fetch(API_BASE + '/api/v1/agent/chat', {
             method: 'POST',
             headers: { 'Content-Type': 'application/json' },
-            body: JSON.stringify({ query: text }),
+            body: JSON.stringify({ query: text, history: history }),
         });
 
         hideTyping();
 
         if (!res.ok) {
-            const err = await res.json().catch(() => ({}));
-            addMessage(err.error || `Server error (${res.status})`, 'bot', 'bot-error');
+            const err = await res.json().catch(function() { return {}; });
+            addMessage(err.error || 'Server error (' + res.status + ')', 'bot', 'bot-error');
             return;
         }
 
         const data = await res.json();
-        addMessage(formatAgentReply(data), 'bot');
+        const reply = formatAgentReply(data);
+        addMessage(reply, 'bot');
+
+        // Remember this turn so follow-ups have context
+        history.push({ role: 'user', content: text });
+        history.push({ role: 'assistant', content: reply });
+        if (history.length > HISTORY_MAX * 2) {
+            history = history.slice(-HISTORY_MAX * 2);
+        }
 
     } catch (err) {
         hideTyping();
         addMessage(
-            isOnline
-                ? `Connection error: ${err.message}`
-                : 'Cannot reach Gathm API. Start the server: gathm-api --port 8080',
+            isOnline ? 'Connection error: ' + err.message
+                     : 'Cannot reach Gathm API. Start the server: gathm-api --port 8080',
             'bot', 'bot-error'
         );
     } finally {
@@ -172,11 +171,13 @@ async function sendMessage() {
 }
 
 sendBtn.addEventListener('click', sendMessage);
-messageInput.addEventListener('keypress', e => { if (e.key === 'Enter') sendMessage(); });
+messageInput.addEventListener('keypress', function(e) {
+    if (e.key === 'Enter') sendMessage();
+});
 
-// ══════════════════════════════════════════════════════════════
-// Voice mode — Web Audio API drives real frequency visualization
-// ══════════════════════════════════════════════════════════════
+// =========================================================================
+// Voice mode -- Web Audio API drives real frequency visualization
+// =========================================================================
 
 let audioCtx    = null;
 let analyser    = null;
@@ -197,7 +198,7 @@ async function startVoice() {
 
     audioCtx = new (window.AudioContext || window.webkitAudioContext)();
     analyser = audioCtx.createAnalyser();
-    analyser.fftSize = 64;            // 32 frequency bins
+    analyser.fftSize = 64;
     analyser.smoothingTimeConstant = 0.75;
 
     const src = audioCtx.createMediaStreamSource(micStream);
@@ -207,7 +208,7 @@ async function startVoice() {
     aiOrb.setAttribute('data-live', 'true');
     setOrbState('speaking');
     micBtn.classList.add('active');
-    botStatus.textContent = 'Listening…';
+    botStatus.textContent = 'Listening...';
 
     driveFrequency();
 }
@@ -215,13 +216,12 @@ async function startVoice() {
 function stopVoice() {
     voiceActive = false;
     if (rafId) cancelAnimationFrame(rafId);
-    micStream?.getTracks().forEach(t => t.stop());
-    audioCtx?.close();
+    if (micStream) micStream.getTracks().forEach(function(t) { t.stop(); });
+    if (audioCtx) audioCtx.close();
     audioCtx = null; analyser = null; micStream = null; rafId = null;
 
-    // Reset live transforms
     mainOrb.style.transform = '';
-    bars.forEach(b => { b.style.height = ''; });
+    bars.forEach(function(b) { b.style.height = ''; });
 
     aiOrb.removeAttribute('data-live');
     setOrbState('idle');
@@ -232,26 +232,24 @@ function stopVoice() {
 function driveFrequency() {
     if (!voiceActive || !analyser) return;
 
-    const data = new Uint8Array(analyser.frequencyBinCount); // 32 values
+    const data = new Uint8Array(analyser.frequencyBinCount);
     analyser.getByteFrequencyData(data);
 
-    // Overall energy → orb scale (1.0 – 1.18)
-    const avg = data.reduce((s, v) => s + v, 0) / data.length;
+    const avg = data.reduce(function(s, v) { return s + v; }, 0) / data.length;
     const scale = 1 + (avg / 255) * 0.18;
-    mainOrb.style.transform = `scale(${scale.toFixed(4)})`;
+    mainOrb.style.transform = 'scale(' + scale.toFixed(4) + ')';
 
-    // Per-band energy → bar heights (4 px – 38 px)
     const step = Math.max(1, Math.floor(data.length / bars.length));
-    bars.forEach((bar, i) => {
-        const val = data[i * step] ?? 0;
+    bars.forEach(function(bar, i) {
+        const val = data[i * step] || 0;
         const h = 4 + (val / 255) * 34;
-        bar.style.height = `${h.toFixed(1)}px`;
+        bar.style.height = h.toFixed(1) + 'px';
     });
 
     rafId = requestAnimationFrame(driveFrequency);
 }
 
-micBtn.addEventListener('click', () => {
+micBtn.addEventListener('click', function() {
     if (voiceActive) stopVoice();
     else startVoice();
 });
diff --git a/pilot/chat_once.py b/pilot/chat_once.py
new file mode 100755
index 0000000..5e3db31
--- /dev/null
+++ b/pilot/chat_once.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Single-shot, non-interactive entry point to the Pilot LLM agent.
+
+Used by the API server (POST /api/v1/agent/chat) so the GUI can talk to the
+real LangGraph + Ollama/Gemini agent instead of the bash keyword router.
+
+Protocol:
+  stdin  : JSON {"query": "...", "history": [{"role": "user"|"assistant",
+                                              "content": "..."}]}
+  stdout : JSON {"reply": "...", "backend": "...", "model": "..."}
+           or  {"error": "..."} on failure (exit code != 0)
+
+All of the agent's human/TUI output is redirected to stderr so stdout stays
+clean JSON for the caller to parse.
+"""
+
+import json
+import os
+import sys
+
+# The agent (pilot/main.py) and its TUI print to stdout during reasoning.
+# Redirect stdout to stderr for the whole run; we restore the real stdout
+# only to emit the final JSON line.
+_REAL_STDOUT = sys.stdout
+sys.stdout = sys.stderr
+
+# Make `import main` work regardless of the caller's CWD.
+sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+
+
+def _emit(obj: dict, code: int = 0) -> int:
+    """Write a single JSON line to the real stdout and return an exit code."""
+    sys.stdout = _REAL_STDOUT
+    print(json.dumps(obj))
+    sys.stdout.flush()
+    return code
+
+
+def _read_request() -> dict:
+    raw = ""
+    try:
+        if not sys.stdin.isatty():
+            raw = sys.stdin.read()
+    except Exception:
+        raw = ""
+    raw = (raw or "").strip()
+    if raw:
+        try:
+            return json.loads(raw)
+        except json.JSONDecodeError:
+            return {"query": raw}
+    # Fallback: query as command-line args
+    if len(sys.argv) > 1:
+        return {"query": " ".join(sys.argv[1:])}
+    return {}
+
+
+def main() -> int:
+    req = _read_request()
+    query = (req.get("query") or "").strip()
+    if not query:
+        return _emit({"error": "empty query"}, 1)
+
+    # Import the agent (langchain/langgraph must be present — i.e. run me with
+    # the pilot venv's python). main.py may raise SystemExit on missing deps,
+    # so catch BaseException to always return clean JSON.
+    try:
+        import main as pilot  # pilot/main.py
+    except SystemExit as exc:
+        return _emit({"error": f"agent dependencies missing (code {exc.code})"}, 2)
+    except BaseException as exc:  # noqa: BLE001 — report any import failure cleanly
+        return _emit({"error": f"agent unavailable: {exc}"}, 2)
+
+    if not getattr(pilot, "LANGCHAIN_AVAILABLE", False) or getattr(pilot, "app", None) is None:
+        return _emit({"error": "LLM runtime not available (langchain/langgraph not installed)"}, 2)
+
+    from langchain_core.messages import HumanMessage, AIMessage
+
+    # Rebuild prior conversation turns for multi-turn context.
+    history = []
+    for turn in req.get("history", []) or []:
+        role = (turn.get("role") or "").lower()
+        content = turn.get("content", "")
+        if not content:
+            continue
+        if role == "user":
+            history.append(HumanMessage(content=content))
+        elif role in ("assistant", "ai", "bot"):
+            history.append(AIMessage(content=content))
+
+    state = {"messages": history + [HumanMessage(content=query)]}
+
+    reply = None
+    try:
+        for output in pilot.app.stream(state, config={"recursion_limit": 25}):
+            for key, value in output.items():
+                if key == "agent" and value.get("next_step") == "end":
+                    reply = value["messages"][-1].content
+    except Exception as exc:  # noqa: BLE001
+        return _emit({"error": f"agent error: {exc}"}, 3)
+
+    return _emit({
+        "reply": reply or "(no response)",
+        "backend": getattr(pilot, "LLM_BACKEND", "unknown"),
+        "model": getattr(pilot, "OLLAMA_MODEL", "unknown"),
+    })
+
+
+if __name__ == "__main__":
+    try:
+        sys.exit(main())
+    except KeyboardInterrupt:
+        sys.exit(130)
+    except BrokenPipeError:
+        sys.exit(0)