diff --git a/SECURITY.md b/SECURITY.md
index 307548b93..bd74691aa 100644
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -402,6 +402,34 @@ http://localhost:*`. The nonce is generated in the proxy, exposed
    the BYOK provider; key material never returns to renderer. Closes
    the exfil path even if all four layers above were bypassed.
 
+**Endpoint providers (local LLMs, #806).** The agent host additionally
+serves `/providers/endpoints/*` — CRUD over user-configured
+OpenAI-compatible endpoints (Ollama preset, self-hosted gateways),
+persisted at `${userData}/endpoints.json`. The split that keeps layer 5
+intact: an endpoint **config** (base URL + registered model list) is
+plain readable config the renderer may list back, while an endpoint's
+optional **API key** rides the `/secrets/*` surface under the endpoint's
+id (the secrets-route allowlist admits configured endpoint ids) and is
+never readable. The config validator
+(`packages/grida-ai-agent/src/protocol/endpoints.ts`) pins the shape —
+http(s) URL, bounded sizes, unknown fields dropped — so a config write
+cannot smuggle credentials or blobs into the readable store. The
+`base_url` is user-owned egress by design (the desktop user points their
+own agent at their own endpoint — same trust model as BYOK), and the
+routes sit behind the same CORS/Referer/Basic-Auth stack as everything
+else. The `/providers/endpoints/probe` route makes the host GET a
+user-supplied URL's model listing (the renderer's grida.co origin cannot
+reach a local Ollama itself) — the same egress a configured run already
+performs; responses are parsed and reduced to
+`{id, tool_call, contextWindow}` rows with bounded reads (timeout + size
+cap), never proxied raw. On sandboxed
+platforms the srt network policy additionally bounds all of this
+structurally: outbound to **localhost** is permitted via the
+`allowLocalBinding` local-ip rule (how the user's own `ollama serve` is
+reached), while a config pointing at an arbitrary **remote** host is
+blocked unless that host is in the enumerated `allowed_domains` — a
+hostile config cannot turn the sidecar into an open exfil channel.
+
 **Electron-side hardening (mandatory; see the
 [Electron security checklist](https://www.electronjs.org/docs/latest/tutorial/security)).**
 `contextIsolation: true`, `nodeIntegration: false`, `sandbox: true`,
diff --git a/desktop/src/preload.ts b/desktop/src/preload.ts
index 45f99b487..cc2dab164 100644
--- a/desktop/src/preload.ts
+++ b/desktop/src/preload.ts
@@ -445,6 +445,18 @@ const bridge: DesktopBridge = {
     },
   },
 
+  providers: {
+    list_endpoints: () => agentClient.providers.list_endpoints(),
+    set_endpoint: async (config) => {
+      await agentClient.providers.set_endpoint(config);
+    },
+    delete_endpoint: async (id) => {
+      await agentClient.providers.delete_endpoint(id);
+    },
+    info: () => agentClient.providers.info(),
+    probe_endpoint: (baseUrl) => agentClient.providers.probe_endpoint(baseUrl),
+  },
+
   agent: {
     run: (opts, onChunk) =>
       // Fresh runs always return a stream (only `reconnect` may return
diff --git a/docs/editor/desktop/_category_.json b/docs/editor/desktop/_category_.json
new file mode 100644
index 000000000..48c425ee7
--- /dev/null
+++ b/docs/editor/desktop/_category_.json
@@ -0,0 +1,8 @@
+{
+  "label": "Desktop",
+  "link": {
+    "type": "generated-index",
+    "title": "Grida Desktop",
+    "description": "Guides for the Grida Desktop app."
+  }
+}
diff --git a/docs/editor/desktop/img/local-models-configured.webp b/docs/editor/desktop/img/local-models-configured.webp
new file mode 100644
index 000000000..141f2e3fb
Binary files /dev/null and b/docs/editor/desktop/img/local-models-configured.webp differ
diff --git a/docs/editor/desktop/local-models.md b/docs/editor/desktop/local-models.md
new file mode 100644
index 000000000..a8650e1c3
--- /dev/null
+++ b/docs/editor/desktop/local-models.md
@@ -0,0 +1,127 @@
+---
+title: Local Models (Ollama)
+description: Run the Grida Desktop agent on AI models that live on your own machine — no account, no API key.
+keywords:
+  - ollama
+  - local llm
+  - local ai
+  - byok
+  - grida desktop
+  - ai agent
+format: md
+doc_tasks:
+  - update
+---
+
+# Local Models (Ollama)
+
+Grida Desktop's AI agent can run on models that live entirely on your own
+machine, served by [Ollama](https://ollama.com). There is no account to
+create and no API key to paste — your prompts, files, and the model's
+responses never leave your computer.
+
+You can use local models alongside provider keys (OpenRouter, Vercel), or
+as your only setup.
+
+## Requirements
+
+- **Grida Desktop** installed.
+- **Ollama** installed and running (`ollama serve` — the desktop Ollama app
+  runs it for you).
+- At least one model pulled, for example:
+
+  ```sh
+  ollama pull gpt-oss:20b
+  ```
+
+A note on expectations: local models vary widely in how well they drive
+the agent. The agent leans on tool calling (reading and writing files,
+running commands, planning), and small models often handle this poorly.
+Models in the ~30B class and up are recommended for agent tasks.
+
+## Set up Ollama
+
+Open **Settings** from the app menu, find the **Local Models** card, and
+click **Set up Ollama**. The base URL is prefilled with Ollama's local
+address (`http://localhost:11434/v1`), and the models you have pulled are
+detected automatically.
+
+![The Local Models card after setup, with an auto-detected model and its context window and tool-support badges](./img/local-models-configured.webp)
+
+Review the list and click **Save**:
+
+- Each detected model shows its **context window** and **tool-calling**
+  support as read-only badges. These come from the endpoint itself and
+  refresh whenever you open Settings (and on **Detect**, useful after you
+  `ollama pull` a new model). For a model that is currently loaded, the
+  context window is the size your server actually allocated; otherwise it
+  is the model's maximum.
+- A model you add manually by id (for example on a gateway that doesn't
+  report capabilities) keeps editable fields instead — there, you are
+  the data source. Manually added models default to a conservative
+  `8192` context.
+
+The first model in the list is the default — background work like session
+titles and summaries also runs on it.
+
+## Use a local model
+
+Registered models appear in the model picker in every agent composer,
+grouped under the endpoint name (for example `gpt-oss:20b · Ollama`).
+Pick one and chat as usual. Everything the agent does — reading your
+workspace files, making edits, planning — runs against the local model.
+Each session remembers the model it ran with.
+
+If you have no provider key configured at all, the agent uses your Ollama
+setup automatically.
+
+## Models without tool support
+
+The agent works through tool calls, so a model that cannot make them
+loses most of its abilities. Tool support is detected per model — Ollama
+reports it, and `ollama show <model>` lists `tools` when a model supports
+tool calling. When you select a model without tool support, the composer
+shows a warning, but you can still chat with it.
+
+## Troubleshooting
+
+- **The model errors immediately.** Check that Ollama is running: open
+  `http://localhost:11434` in a browser — it should answer
+  `Ollama is running`.
+- **A model is missing from the picker.** Only registered models appear.
+  Click **Detect** in **Settings → Local Models** after pulling a new
+  model, or add its id manually.
+- **Long sessions stop or degrade.** The detected context window may be
+  larger than what your serving configuration actually allows (it
+  converges to the served size once the model has been loaded). To pin a
+  smaller value, set an override in the config file — see below.
+- **Slow responses.** Local speed is your hardware's speed. Smaller
+  models respond faster but handle agent tasks worse.
+
+## Other OpenAI-compatible endpoints
+
+The base URL accepts any OpenAI-compatible server on your machine, so a
+local gateway such as LiteLLM or vLLM works the same way: point the base
+URL at it and register the models it serves. If the gateway needs an API
+key, save it in the card's **API key** field (it appears once the
+endpoint is saved) — the key is stored by the agent host and never shown
+back. Ollama itself needs no key.
+
+## Advanced: the config file
+
+Everything on this page is stored as plain JSON in `endpoints.json` (the
+settings card links to it). Detected values refresh automatically, so
+hand-edits to them won't stick — if an endpoint reports a value that is
+wrong for your setup (for example, your server caps context below the
+model's maximum), pin the correction in the model's `overrides` instead.
+Overrides always win over detected values, and detection never touches
+them:
+
+```json
+{
+  "id": "gemma4:31b-mlx",
+  "tool_call": true,
+  "contextWindow": 262144,
+  "overrides": { "contextWindow": 32768 }
+}
+```
diff --git a/editor/app/desktop/settings/page.tsx b/editor/app/desktop/settings/page.tsx
index 2e40b08a7..7e24b3f03 100644
--- a/editor/app/desktop/settings/page.tsx
+++ b/editor/app/desktop/settings/page.tsx
@@ -1,10 +1,11 @@
 "use client";
 
-import { useCallback, useEffect, useState } from "react";
-import { Loader2 } from "lucide-react";
+import { useCallback, useEffect, useRef, useState } from "react";
+import { Loader2, Trash2 } from "lucide-react";
 import { Button } from "@app/ui/components/button";
 import { Input } from "@app/ui/components/input";
 import { Label } from "@app/ui/components/label";
+import { Switch } from "@app/ui/components/switch";
 import {
   Card,
   CardContent,
@@ -16,9 +17,15 @@ import { Skeleton } from "@app/ui/components/skeleton";
 import {
   BYOK_PROVIDER_LABELS,
   DesktopBridgeMissingError,
+  OLLAMA_ENDPOINT_PRESET,
   app,
+  mergeProbedModels,
+  providers,
+  resolveEndpointModel,
   secrets,
   type ByokProviderId,
+  type EndpointModelEntry,
+  type EndpointProviderConfig,
 } from "@/lib/desktop/bridge";
 import {
   DesktopPageContent,
@@ -47,11 +54,12 @@ export default function DesktopSettingsPage() {
         <header>
           <h1 className="text-2xl font-semibold tracking-tight">Settings</h1>
           <p className="mt-1 text-sm text-muted-foreground">
-            AI provider keys and app info.
+            AI provider keys, local models, and app info.
           </p>
         </header>
 
         <ByokSection />
+        <LocalModelsSection />
         <AboutSection />
       </DesktopPageContent>
     </DesktopPageShell>
@@ -277,6 +285,615 @@ function StatusPill({ kind }: { kind: "loading" | "empty" | "configured" }) {
   );
 }
 
+/* ─────────────────────────── Local models ───────────────────────── */
+
+/**
+ * Endpoint provider config (issue #806) — the Ollama preset slot. The
+ * agent host persists configs in `endpoints.json` (plain config, not a
+ * secret; the bridge may read them back, unlike keys).
+ *
+ * The section edits a local draft and persists on Save — endpoint config
+ * is structural (base URL + model list), so field-level autosave would
+ * fire half-formed configs at the host validator.
+ */
+
+type LocalState =
+  | { kind: "loading" }
+  | { kind: "unsupported" }
+  | { kind: "ready"; draft: EndpointProviderConfig | null; dirty: boolean }
+  | { kind: "saving"; draft: EndpointProviderConfig | null }
+  | { kind: "error"; message: string; draft: EndpointProviderConfig | null };
+
+function LocalModelsSection() {
+  const [state, setState] = useState<LocalState>({ kind: "loading" });
+  const [newModelId, setNewModelId] = useState("");
+  const [probing, setProbing] = useState(false);
+  const [probeNote, setProbeNote] = useState<string | null>(null);
+  // Whether the endpoint config exists on the host — the API-key slot is
+  // only rendered then (the secrets allowlist accepts CONFIGURED endpoint
+  // ids; a key for an unsaved draft would 400).
+  const [persisted, setPersisted] = useState(false);
+  // Stale-write guard: detection runs async off a SNAPSHOT of the config
+  // while the form stays editable. Any user action that changes what the
+  // draft means (edit, save, remove, re-setup) bumps this; a completion
+  // holding an older number drops its write instead of resurrecting a
+  // deleted endpoint or wiping newer unsaved edits.
+  const opVersion = useRef(0);
+
+  /**
+   * Discover the endpoint's models (agent-host-side fetch of Ollama's
+   * `/api/tags` + `/api/ps`/`/api/show`, or a generic `/models`) and
+   * refresh the DETECTED fields. Detection owns the top-level
+   * `tool_call`/`contextWindow` on each entry — the probe overwrites
+   * them freely; human corrections live in `overrides` (hand-edited
+   * JSON, or the inputs shown when detection has nothing) and are never
+   * touched here.
+   *
+   * `persist: true` (an already-saved config) writes the refreshed
+   * config straight back — detected facts aren't a user choice, so they
+   * don't sit in an unsaved draft. The setup flow passes `false` and
+   * keeps the explicit Save.
+   */
+  const detectInto = useCallback(
+    async (base: EndpointProviderConfig, opts: { persist: boolean }) => {
+      const version = opVersion.current;
+      setProbing(true);
+      setProbeNote(null);
+      try {
+        const result = await providers.probeEndpoint(base.base_url);
+        // `base` is stale once the user edited/saved/removed mid-probe —
+        // applying it would undo their action. Drop the result silently.
+        if (opVersion.current !== version) return;
+        const merged = mergeProbedModels(base.models, result.models);
+        setProbeNote(
+          merged.discovered > 0
+            ? `Found ${merged.discovered} model${merged.discovered === 1 ? "" : "s"}.`
+            : merged.updated > 0
+              ? "Updated model details."
+              : "No new models found."
+        );
+        if (merged.discovered === 0 && merged.updated === 0) return;
+        const next = { ...base, models: merged.models };
+        if (opts.persist) {
+          await providers.setEndpoint(next);
+          if (opVersion.current !== version) return;
+          setState({ kind: "ready", draft: next, dirty: false });
+        } else {
+          setState({ kind: "ready", draft: next, dirty: true });
+        }
+      } catch (err) {
+        if (opVersion.current !== version) return;
+        setProbeNote(
+          `Couldn't reach the endpoint (${describeError(err)}) — add models manually.`
+        );
+      } finally {
+        setProbing(false);
+      }
+    },
+    []
+  );
+
+  const refresh = useCallback(async () => {
+    if (!providers.isSupported()) {
+      setState({ kind: "unsupported" });
+      return;
+    }
+    const version = ++opVersion.current;
+    try {
+      const list = await providers.listEndpoints();
+      const ollama = list.find((e) => e.id === OLLAMA_ENDPOINT_PRESET.id);
+      if (opVersion.current !== version) return;
+      setState({ kind: "ready", draft: ollama ?? null, dirty: false });
+      setPersisted(ollama != null);
+      // Detected values converge to the server's truth on every visit —
+      // notably /api/ps starts reporting a model's REAL allocation once
+      // it has been loaded. Fire-and-forget; failures only leave a note.
+      if (ollama) void detectInto(ollama, { persist: true });
+    } catch (err) {
+      if (opVersion.current !== version) return;
+      setState({ kind: "error", message: describeError(err), draft: null });
+    }
+  }, [detectInto]);
+
+  useEffect(() => {
+    void refresh();
+  }, [refresh]);
+
+  const draft = "draft" in state ? state.draft : null;
+
+  const edit = useCallback((next: EndpointProviderConfig) => {
+    opVersion.current += 1;
+    setState({ kind: "ready", draft: next, dirty: true });
+  }, []);
+
+  const handleSave = useCallback(async () => {
+    if (!draft) return;
+    const version = ++opVersion.current;
+    setState({ kind: "saving", draft });
+    try {
+      await providers.setEndpoint(draft);
+      const list = await providers.listEndpoints();
+      const saved = list.find((e) => e.id === OLLAMA_ENDPOINT_PRESET.id);
+      setPersisted(saved != null);
+      // An edit made while the save was in flight wins over the read-back.
+      if (opVersion.current !== version) return;
+      setState({ kind: "ready", draft: saved ?? null, dirty: false });
+    } catch (err) {
+      if (opVersion.current !== version) return;
+      setState({ kind: "error", message: describeError(err), draft });
+    }
+  }, [draft]);
+
+  const handleEnable = useCallback(() => {
+    opVersion.current += 1;
+    const base: EndpointProviderConfig = {
+      ...OLLAMA_ENDPOINT_PRESET,
+      models: [],
+    };
+    setState({ kind: "ready", draft: base, dirty: true });
+    // Prefill from the running Ollama right away — the common path is
+    // "models already pulled; nothing to type". Not persisted until the
+    // user confirms with Save (the config doesn't exist yet).
+    void detectInto(base, { persist: false });
+  }, [detectInto]);
+
+  const handleRemove = useCallback(async () => {
+    if (!draft) return;
+    // Bump FIRST: an in-flight detection completing after this click must
+    // not persist its snapshot back and resurrect the deleted endpoint.
+    opVersion.current += 1;
+    let confirmed = false;
+    try {
+      confirmed = await providers.confirmDeleteEndpoint(
+        draft.label ?? draft.id
+      );
+    } catch (err) {
+      setState({ kind: "error", message: describeError(err), draft });
+      return;
+    }
+    if (!confirmed) return;
+    setState({ kind: "saving", draft });
+    try {
+      await providers.deleteEndpoint(draft.id);
+      await refresh();
+    } catch (err) {
+      setState({ kind: "error", message: describeError(err), draft });
+    }
+  }, [draft, refresh]);
+
+  const addModel = useCallback(() => {
+    if (!draft) return;
+    const id = newModelId.trim();
+    if (!id || draft.models.some((m) => m.id === id)) return;
+    edit({ ...draft, models: [...draft.models, { id }] });
+    setNewModelId("");
+  }, [draft, newModelId, edit]);
+
+  const saveDisabled =
+    state.kind !== "ready" ||
+    !state.dirty ||
+    !draft ||
+    draft.base_url.trim().length === 0;
+
+  // Old desktop binaries have no bridge surface for this — hide rather
+  // than render a dead section.
+  if (state.kind === "unsupported") return null;
+
+  return (
+    <Card>
+      <CardHeader>
+        <CardTitle>Local Models</CardTitle>
+        <CardDescription>
+          Run the agent on your own machine with{" "}
+          <a
+            className="underline underline-offset-4"
+            href="https://ollama.com"
+            target="_blank"
+            rel="noreferrer"
+          >
+            Ollama
+          </a>{" "}
+          — no account, no API key. Start <code>ollama serve</code> and pull a
+          model; Grida detects it automatically. Local models vary widely in
+          agent ability; larger models (~30B+) are recommended for agent tasks.
+        </CardDescription>
+      </CardHeader>
+      <CardContent className="flex flex-col gap-4">
+        {state.kind === "loading" ? (
+          <Skeleton className="h-9 w-full" />
+        ) : !draft ? (
+          <div className="flex justify-start">
+            <Button variant="outline" onClick={handleEnable}>
+              Set up Ollama
+            </Button>
+          </div>
+        ) : (
+          <>
+            <div className="flex flex-col gap-2">
+              <Label className="text-sm font-medium">Base URL</Label>
+              <Input
+                value={draft.base_url}
+                onChange={(e) => edit({ ...draft, base_url: e.target.value })}
+                placeholder={OLLAMA_ENDPOINT_PRESET.base_url}
+                autoComplete="off"
+                spellCheck={false}
+              />
+            </div>
+
+            <div className="flex flex-col gap-2">
+              <div className="flex items-center justify-between">
+                <Label className="text-sm font-medium">Models</Label>
+                <Button
+                  variant="outline"
+                  size="sm"
+                  disabled={probing || state.kind === "saving"}
+                  onClick={() => void detectInto(draft, { persist: false })}
+                >
+                  {probing ? <Loader2 className="size-4 animate-spin" /> : null}
+                  Detect
+                </Button>
+              </div>
+              {probeNote && (
+                <p className="text-xs text-muted-foreground" role="status">
+                  {probeNote}
+                </p>
+              )}
+              {draft.models.length === 0 && !probing && (
+                <p className="text-xs text-muted-foreground">
+                  Models you pulled in Ollama are detected automatically — or
+                  add one by id (e.g. <code>llama3.1:8b</code>). The first model
+                  is the default.
+                </p>
+              )}
+              {draft.models.map((model, index) => (
+                <LocalModelRow
+                  key={model.id}
+                  model={model}
+                  onChange={(next) =>
+                    edit({
+                      ...draft,
+                      models: draft.models.map((m, i) =>
+                        i === index ? next : m
+                      ),
+                    })
+                  }
+                  onRemove={() =>
+                    edit({
+                      ...draft,
+                      models: draft.models.filter((_, i) => i !== index),
+                      default_model_id:
+                        draft.default_model_id === model.id
+                          ? undefined
+                          : draft.default_model_id,
+                    })
+                  }
+                />
+              ))}
+              <div className="flex gap-2">
+                <Input
+                  value={newModelId}
+                  onChange={(e) => setNewModelId(e.target.value)}
+                  onKeyDown={(e) => {
+                    if (e.key === "Enter") {
+                      e.preventDefault();
+                      addModel();
+                    }
+                  }}
+                  placeholder="model id, e.g. llama3.1:8b"
+                  autoComplete="off"
+                  spellCheck={false}
+                />
+                <Button
+                  variant="outline"
+                  disabled={newModelId.trim().length === 0}
+                  onClick={addModel}
+                >
+                  Add
+                </Button>
+              </div>
+            </div>
+
+            {persisted && (
+              <EndpointKeyRow
+                endpointId={draft.id}
+                label={draft.label ?? draft.id}
+              />
+            )}
+
+            <div className="flex items-center justify-between">
+              <Button
+                variant="ghost"
+                size="sm"
+                className="text-destructive hover:text-destructive"
+                disabled={state.kind === "saving"}
+                onClick={() => void handleRemove()}
+              >
+                Remove
+              </Button>
+              <Button
+                size="default"
+                disabled={saveDisabled}
+                onClick={() => void handleSave()}
+              >
+                {state.kind === "saving" ? (
+                  <Loader2 className="size-4 animate-spin" />
+                ) : null}
+                Save
+              </Button>
+            </div>
+          </>
+        )}
+
+        {state.kind === "error" && (
+          <button
+            type="button"
+            role="alert"
+            aria-live="polite"
+            onClick={() => void refresh()}
+            className="self-start text-left text-sm text-destructive underline-offset-4 hover:underline"
+          >
+            {state.message} (click to retry)
+          </button>
+        )}
+
+        {draft && providers.canRevealConfigFile() && (
+          <p className="text-xs text-muted-foreground">
+            Stored as plain JSON — detected values refresh automatically; to pin
+            a value the endpoint reports wrong, set <code>overrides</code> in{" "}
+            <button
+              type="button"
+              className="underline underline-offset-4 hover:text-foreground"
+              onClick={() => void providers.revealConfigFile()}
+            >
+              endpoints.json
+            </button>
+            .
+          </p>
+        )}
+      </CardContent>
+    </Card>
+  );
+}
+
+type EndpointKeyState =
+  | { kind: "loading" | "empty" | "configured" | "saving" | "removing" }
+  | { kind: "error"; message: string };
+
+/**
+ * Optional API key for a configured endpoint (issue #806). Ollama needs
+ * none; a keyed self-hosted gateway stores its key HERE — through the
+ * same write/presence/delete-only `secrets` surface as BYOK keys, under
+ * the ENDPOINT's id (GRIDA-SEC-004: never inside the endpoint config,
+ * never readable back). Rendered only for a persisted endpoint, since
+ * the secrets allowlist accepts configured endpoint ids only.
+ */
+function EndpointKeyRow({
+  endpointId,
+  label,
+}: {
+  endpointId: string;
+  label: string;
+}) {
+  const [state, setState] = useState<EndpointKeyState>({ kind: "loading" });
+  const [value, setValue] = useState("");
+
+  const refresh = useCallback(async () => {
+    try {
+      setState({
+        kind: (await secrets.hasKey(endpointId)) ? "configured" : "empty",
+      });
+    } catch (err) {
+      setState({ kind: "error", message: describeError(err) });
+    }
+  }, [endpointId]);
+
+  useEffect(() => {
+    void refresh();
+  }, [refresh]);
+
+  const handleSaveKey = useCallback(async () => {
+    setState({ kind: "saving" });
+    try {
+      await secrets.setKey(endpointId, value);
+      setValue("");
+      await refresh();
+    } catch (err) {
+      setValue("");
+      setState({ kind: "error", message: describeError(err) });
+    }
+  }, [endpointId, value, refresh]);
+
+  const handleRemoveKey = useCallback(async () => {
+    let confirmed = false;
+    try {
+      confirmed = await secrets.confirmDeleteKey(endpointId, label);
+    } catch (err) {
+      setState({ kind: "error", message: describeError(err) });
+      return;
+    }
+    if (!confirmed) return;
+    setState({ kind: "removing" });
+    try {
+      await secrets.deleteKey(endpointId);
+      await refresh();
+    } catch (err) {
+      setState({ kind: "error", message: describeError(err) });
+    }
+  }, [endpointId, label, refresh]);
+
+  return (
+    <div className="flex flex-col gap-2">
+      <div className="flex items-center justify-between">
+        <Label className="text-sm font-medium">API key</Label>
+        {(state.kind === "configured" || state.kind === "removing") && (
+          <Button
+            variant="outline"
+            size="sm"
+            disabled={state.kind === "removing"}
+            onClick={() => void handleRemoveKey()}
+          >
+            {state.kind === "removing" ? (
+              <Loader2 className="size-4 animate-spin" />
+            ) : null}
+            Remove key
+          </Button>
+        )}
+      </div>
+
+      {state.kind === "loading" ? (
+        <Skeleton className="h-9 w-full" />
+      ) : state.kind === "error" ? (
+        <button
+          type="button"
+          role="alert"
+          aria-live="polite"
+          onClick={() => void refresh()}
+          className="self-start text-left text-sm text-destructive underline-offset-4 hover:underline"
+        >
+          {state.message} (click to retry)
+        </button>
+      ) : state.kind === "configured" || state.kind === "removing" ? (
+        <p className="text-xs text-muted-foreground">
+          A key is configured for this endpoint — stored by the agent host,
+          never shown back.
+        </p>
+      ) : (
+        <>
+          <div className="flex gap-2">
+            <Input
+              type="password"
+              placeholder="Optional — Ollama needs none"
+              value={value}
+              onChange={(e) => setValue(e.target.value)}
+              disabled={state.kind === "saving"}
+              autoComplete="off"
+              spellCheck={false}
+            />
+            <Button
+              size="default"
+              disabled={state.kind === "saving" || value.trim().length === 0}
+              onClick={() => void handleSaveKey()}
+            >
+              {state.kind === "saving" ? (
+                <Loader2 className="size-4 animate-spin" />
+              ) : null}
+              Save key
+            </Button>
+          </div>
+          <p className="text-xs text-muted-foreground">
+            For gateways that require authentication (a keyed LiteLLM or vLLM).
+            Sent as a bearer token on requests to this endpoint.
+          </p>
+        </>
+      )}
+    </div>
+  );
+}
+
+const compactTokens = new Intl.NumberFormat("en-US", { notation: "compact" });
+
+/**
+ * One registered model. Detection owns the capability fields: a value
+ * the endpoint reported renders as a read-only badge (no input over
+ * discoverable truth — a hand-typed snapshot only rots). Inputs appear
+ * ONLY where detection has nothing (manual adds, ids-only gateways);
+ * they write to `overrides`, the sticky human slot a probe refresh
+ * never touches.
+ */
+function LocalModelRow({
+  model,
+  onChange,
+  onRemove,
+}: {
+  model: EndpointModelEntry;
+  onChange: (next: EndpointModelEntry) => void;
+  onRemove: () => void;
+}) {
+  const resolved = resolveEndpointModel(model);
+  const ctxOverridden = model.overrides?.contextWindow !== undefined;
+  const toolsOverridden = model.overrides?.tool_call !== undefined;
+
+  return (
+    <div className="flex items-center gap-2 rounded-md border px-3 py-2">
+      <span className="flex-1 truncate font-mono text-xs">{model.id}</span>
+
+      {model.contextWindow !== undefined ? (
+        <span
+          className="shrink-0 rounded-md bg-secondary px-2 py-1 font-mono text-xs tabular-nums text-muted-foreground"
+          title={
+            ctxOverridden
+              ? "Context window (manual override from endpoints.json)"
+              : "Context window (detected from the endpoint)"
+          }
+        >
+          {/* non-null: this branch is gated on a detected contextWindow,
+              and resolution only ever overrides it, never unsets it */}
+          {compactTokens.format(resolved.contextWindow!)} ctx
+          {ctxOverridden ? " ·m" : ""}
+        </span>
+      ) : (
+        <Input
+          className="h-8 w-28 text-xs"
+          type="number"
+          min={1024}
+          step={1024}
+          value={model.overrides?.contextWindow ?? ""}
+          onChange={(e) => {
+            const value = e.target.valueAsNumber;
+            onChange({
+              ...model,
+              overrides: {
+                ...model.overrides,
+                contextWindow: Number.isFinite(value)
+                  ? Math.max(1, Math.floor(value))
+                  : undefined,
+              },
+            });
+          }}
+          placeholder="ctx (8192)"
+          aria-label="Context window (tokens)"
+        />
+      )}
+
+      {model.tool_call !== undefined ? (
+        <span
+          className="shrink-0 rounded-md bg-secondary px-2 py-1 text-xs text-muted-foreground"
+          title={
+            toolsOverridden
+              ? "Tool-calling (manual override from endpoints.json)"
+              : "Tool-calling (detected from the endpoint)"
+          }
+        >
+          {resolved.tool_call ? "tools" : "no tools"}
+        </span>
+      ) : (
+        <label className="flex items-center gap-1.5 text-xs text-muted-foreground">
+          <Switch
+            checked={resolved.tool_call ?? true}
+            onCheckedChange={(checked) =>
+              onChange({
+                ...model,
+                overrides: { ...model.overrides, tool_call: checked },
+              })
+            }
+            aria-label="Supports tool calls"
+          />
+          tools
+        </label>
+      )}
+
+      <Button
+        variant="ghost"
+        size="icon-sm"
+        aria-label={`Remove ${model.id}`}
+        onClick={onRemove}
+      >
+        <Trash2 className="size-3.5" />
+      </Button>
+    </div>
+  );
+}
+
 /* ────────────────────────────── About ────────────────────────────── */
 
 function AboutSection() {
diff --git a/editor/app/desktop/welcome/page.tsx b/editor/app/desktop/welcome/page.tsx
index f3f812599..6dcfcaddf 100644
--- a/editor/app/desktop/welcome/page.tsx
+++ b/editor/app/desktop/welcome/page.tsx
@@ -57,6 +57,7 @@ import {
   DesktopModelPicker,
   useModelPickerState,
 } from "@/scaffolds/desktop/shared/model-picker";
+import { useEndpointProviders } from "@/scaffolds/desktop/shared/registered-models";
 import { useWorkspaceComposerCatalog } from "@/scaffolds/desktop/shared/use-workspace-composer-catalog";
 import { workspaceWorkbenchHref } from "@/scaffolds/desktop/workbench/workspace-workbench-url";
 
@@ -110,6 +111,10 @@ export default function DesktopWelcomePage() {
   // empty id and yields an empty catalog.
   const catalog = useWorkspaceComposerCatalog(selectedId ?? "");
 
+  // Configured endpoint providers (issue #806): registered local models
+  // join the welcome composer's picker too.
+  const endpoints = useEndpointProviders();
+
   // Model selection for the composer. No sessions here (the welcome page
   // never loads a chat), so this just holds the user's pick at the
   // default; it rides the handoff so the workspace chat's first turn runs
@@ -117,6 +122,7 @@ export default function DesktopWelcomePage() {
   const { model_id: modelId, setModelId } = useModelPickerState({
     current_id: null,
     sessions: [],
+    endpoints,
   });
 
   const onOpen = useCallback(async () => {
@@ -271,6 +277,7 @@ export default function DesktopWelcomePage() {
                 <DesktopModelPicker
                   value={modelId}
                   onValueChange={setModelId}
+                  endpoints={endpoints}
                 />
               }
             />
diff --git a/editor/lib/agent-chat/approval-resume.test.ts b/editor/lib/agent-chat/approval-resume.test.ts
index 9b4e78959..7a9634aec 100644
--- a/editor/lib/agent-chat/approval-resume.test.ts
+++ b/editor/lib/agent-chat/approval-resume.test.ts
@@ -26,6 +26,21 @@ describe("buildApprovalResumeBody", () => {
     });
   });
 
+  it("pins the endpoint provider on resume — same rule as a normal send (#806)", () => {
+    // A resume re-enters /agent/run: without the pin, a registered local
+    // model id would cascade BYOK-first onto a provider that can't serve it.
+    const body = buildApprovalResumeBody({
+      session_id: "ses_1",
+      model_id: "llama3.1:8b",
+      provider_id: "ollama",
+      mode: "accept-edits",
+      tool_call_id: "tc1",
+      approval_id: "ap1",
+      approved: true,
+    });
+    expect(body.provider_id).toBe("ollama");
+  });
+
   it("forwards a denial (approved: false) verbatim", () => {
     const body = buildApprovalResumeBody({
       mode: "accept-edits",
diff --git a/editor/lib/agent-chat/approval-resume.ts b/editor/lib/agent-chat/approval-resume.ts
index 65404c1ce..e3c7209b1 100644
--- a/editor/lib/agent-chat/approval-resume.ts
+++ b/editor/lib/agent-chat/approval-resume.ts
@@ -18,6 +18,13 @@ import type { AgentMode } from "@/lib/desktop/bridge";
 export type ApprovalResumeBody = {
   session_id?: string;
   model_id?: string;
+  /**
+   * Endpoint provider pin (issue #806) — same rule as a normal send: a
+   * resume re-enters `/agent/run`, so without the pin a registered local
+   * model id would cascade BYOK-first and land on a provider that cannot
+   * serve it. Omitted for catalog models.
+   */
+  provider_id?: string;
   mode: AgentMode;
   approval_answer: {
     tool_call_id: string;
@@ -29,6 +36,7 @@ export type ApprovalResumeBody = {
 export type ApprovalResumeArgs = {
   session_id?: string;
   model_id?: string;
+  provider_id?: string;
   mode: AgentMode;
   tool_call_id: string;
   approval_id: string;
@@ -41,6 +49,7 @@ export function buildApprovalResumeBody(
   return {
     session_id: args.session_id,
     model_id: args.model_id,
+    provider_id: args.provider_id,
     mode: args.mode,
     approval_answer: {
       tool_call_id: args.tool_call_id,
diff --git a/editor/lib/agent-chat/build-agent-send.test.ts b/editor/lib/agent-chat/build-agent-send.test.ts
index 0de7a24c5..b40b3cf74 100644
--- a/editor/lib/agent-chat/build-agent-send.test.ts
+++ b/editor/lib/agent-chat/build-agent-send.test.ts
@@ -91,3 +91,42 @@ describe("buildAgentSend", () => {
     ).toBe(false);
   });
 });
+
+describe("buildAgentSend — endpoint provider pin (#806)", () => {
+  it("rides provider_id when the picked model is a registered endpoint model", () => {
+    const sendMessage = vi.fn<SendMessageFn>();
+    const send = buildAgentSend({
+      sendMessage,
+      sessionId: "s1",
+      modelId: "llama3.1:8b",
+      providerId: "ollama",
+    });
+
+    send("hi");
+
+    expect(sendMessage).toHaveBeenCalledWith(
+      { text: "hi" },
+      {
+        body: {
+          session_id: "s1",
+          model_id: "llama3.1:8b",
+          provider_id: "ollama",
+        },
+      }
+    );
+  });
+
+  it("omits provider_id for catalog models (BYOK cascade stays in charge)", () => {
+    const sendMessage = vi.fn<SendMessageFn>();
+    const send = buildAgentSend({
+      sendMessage,
+      sessionId: "s1",
+      modelId: "anthropic/claude-sonnet-4.6",
+    });
+
+    send("hi");
+
+    const body = sendMessage.mock.calls[0][1]?.body;
+    expect(body).not.toHaveProperty("provider_id");
+  });
+});
diff --git a/editor/lib/agent-chat/build-agent-send.ts b/editor/lib/agent-chat/build-agent-send.ts
index 92c28b174..a3f556cff 100644
--- a/editor/lib/agent-chat/build-agent-send.ts
+++ b/editor/lib/agent-chat/build-agent-send.ts
@@ -16,6 +16,13 @@ import type { AgentMode } from "@grida/agent";
 export type AgentSendBody = {
   session_id?: string;
   model_id: string;
+  /**
+   * Explicit provider pick (issue #806). Set when the chosen model is a
+   * registered endpoint model — provider resolution otherwise cascades
+   * BYOK-first, and a stored OpenRouter key would swallow a local model
+   * id it cannot serve. Omitted for catalog models (cascade is correct).
+   */
+  provider_id?: string;
   /** Permission/supervision posture for the turn (RFC `permission modes`). */
   mode?: AgentMode;
   /** Per-send skill subset (workspace tab); omitted on tab-less surfaces. */
@@ -32,15 +39,18 @@ export function buildAgentSend(opts: {
   sendMessage: SendMessageFn;
   sessionId: string | null;
   modelId: string;
+  /** Endpoint provider id serving `modelId`, when it's a registered model. */
+  providerId?: string;
   mode?: AgentMode;
   skills?: string[];
 }): (text: string, files?: FileUIPart[]) => void {
-  const { sendMessage, sessionId, modelId, mode, skills } = opts;
+  const { sendMessage, sessionId, modelId, providerId, mode, skills } = opts;
   return (text, files) => {
     const body: AgentSendBody = {
       session_id: sessionId ?? undefined,
       model_id: modelId,
     };
+    if (providerId) body.provider_id = providerId;
     if (mode) body.mode = mode;
     if (skills) body.skills = skills;
     void sendMessage(files && files.length > 0 ? { text, files } : { text }, {
diff --git a/editor/lib/agent-chat/web-daemon-bridge.ts b/editor/lib/agent-chat/web-daemon-bridge.ts
index 7ede0dc6a..544c08d17 100644
--- a/editor/lib/agent-chat/web-daemon-bridge.ts
+++ b/editor/lib/agent-chat/web-daemon-bridge.ts
@@ -219,6 +219,13 @@ export function createWebDaemonBridge(
       set: (providerId, key) => client.secrets.set(providerId, key),
       delete: (providerId) => client.secrets.delete(providerId),
     },
+    providers: {
+      list_endpoints: () => client.providers.list_endpoints(),
+      set_endpoint: (config) => client.providers.set_endpoint(config),
+      delete_endpoint: (id) => client.providers.delete_endpoint(id),
+      info: () => client.providers.info(),
+      probe_endpoint: (baseUrl) => client.providers.probe_endpoint(baseUrl),
+    },
 
     agent: {
       run: (opts, onChunk) =>
diff --git a/editor/lib/desktop/bridge-boundary.test.ts b/editor/lib/desktop/bridge-boundary.test.ts
index fbcdb61a3..74e2e607d 100644
--- a/editor/lib/desktop/bridge-boundary.test.ts
+++ b/editor/lib/desktop/bridge-boundary.test.ts
@@ -66,6 +66,7 @@ describe("/desktop bridge boundary", () => {
       "/secrets/has",
       "/secrets/set",
       "/secrets/delete",
+      "/providers/endpoints/",
       "/sessions",
       "/workspaces",
       "/files/",
diff --git a/editor/lib/desktop/bridge.ts b/editor/lib/desktop/bridge.ts
index 9e7316421..0228c6be3 100644
--- a/editor/lib/desktop/bridge.ts
+++ b/editor/lib/desktop/bridge.ts
@@ -21,13 +21,23 @@ import {
   BYOK_PROVIDER_METADATA,
   AGENT_TIERS,
   AGENT_SESSION_AGENT,
+  OLLAMA_ENDPOINT_PRESET,
+  mergeProbedModels,
+  resolveEndpointModel,
+  resolveEndpointModels,
   type AgentMode,
   type AgentUIMessageChunk,
   type AgentRunOptions,
   type ByokProviderId,
+  type ProviderId,
   type ChatMessageWithParts,
   type ChatSessionRow,
   type CreateSessionOptions,
+  type EndpointModelEntry,
+  type EndpointModelOverrides,
+  type EndpointModelSpec,
+  type EndpointProviderConfig,
+  type ProbedEndpointModel,
   type PatchSessionOptions,
   type RewindResult,
   type SessionListFilter,
@@ -58,11 +68,21 @@ export {
   BYOK_PROVIDER_METADATA,
   AGENT_TIERS,
   AGENT_SESSION_AGENT,
+  OLLAMA_ENDPOINT_PRESET,
+  mergeProbedModels,
+  resolveEndpointModel,
+  resolveEndpointModels,
+  type EndpointModelEntry,
+  type EndpointModelOverrides,
+  type EndpointModelSpec,
+  type EndpointProviderConfig,
+  type ProbedEndpointModel,
   type AgentMode,
   type AgentUIMessageChunk,
   type AgentRunOptions,
   type ByokProviderId,
   type ByokProviderMetadata,
+  type ProviderId,
   type ChatMessageRow,
   type ChatMessageWithParts,
   type ChatModel,
@@ -252,7 +272,11 @@ export namespace secrets {
     return BYOK_PROVIDER_METADATA;
   }
 
-  export async function hasKey(providerId: ByokProviderId): Promise<boolean> {
+  // Provider ids here are BYOK ids OR configured endpoint ids (#806) —
+  // a keyed gateway stores its key under its endpoint id through these
+  // same helpers. The agent host validates membership; unknown ids 400.
+
+  export async function hasKey(providerId: ProviderId): Promise<boolean> {
     return await bridgeOrThrow().secrets.has(providerId);
   }
 
@@ -263,7 +287,7 @@ export namespace secrets {
    * round-trip.
    */
   export async function setKey(
-    providerId: ByokProviderId,
+    providerId: ProviderId,
     key: string
   ): Promise<void> {
     if (key.trim().length === 0) {
@@ -272,7 +296,7 @@ export namespace secrets {
     await bridgeOrThrow().secrets.set(providerId, key);
   }
 
-  export async function deleteKey(providerId: ByokProviderId): Promise<void> {
+  export async function deleteKey(providerId: ProviderId): Promise<void> {
     await bridgeOrThrow().secrets.delete(providerId);
   }
 
@@ -287,9 +311,14 @@ export namespace secrets {
    * matches platform convention for destructive prompts.
    */
   export async function confirmDeleteKey(
-    providerId: ByokProviderId
+    providerId: ProviderId,
+    /** Display name override — endpoint ids have no BYOK label. */
+    displayLabel?: string
   ): Promise<boolean> {
-    const label = BYOK_PROVIDER_LABELS[providerId];
+    const label =
+      displayLabel ??
+      BYOK_PROVIDER_LABELS[providerId as ByokProviderId] ??
+      providerId;
     const choice = await bridgeOrThrow().dialog.confirm({
       message: `Remove ${label} key?`,
       detail:
@@ -302,6 +331,95 @@ export namespace secrets {
   }
 }
 
+/* ─────────────────────── providers namespace ─────────────────── */
+
+/**
+ * Endpoint provider config (issue #806) — user-configured OpenAI-
+ * compatible endpoints (Ollama preset, self-hosted gateways). Plain
+ * readable config, unlike `secrets`: the renderer may list configs back.
+ * A keyed gateway stores its key via the `secrets` namespace under the
+ * endpoint's id; this namespace never carries credentials.
+ *
+ * The bridge field is OPTIONAL (older desktop binaries) — UI must gate
+ * on {@link providers.isSupported}.
+ */
+export namespace providers {
+  export function isSupported(): boolean {
+    return getDesktopBridge()?.providers != null;
+  }
+
+  export async function listEndpoints(): Promise<EndpointProviderConfig[]> {
+    const bridge = bridgeOrThrow().providers;
+    if (!bridge) return [];
+    return await bridge.list_endpoints();
+  }
+
+  export async function setEndpoint(
+    config: EndpointProviderConfig
+  ): Promise<void> {
+    const bridge = bridgeOrThrow().providers;
+    if (!bridge) throw new DesktopBridgeMissingError();
+    await bridge.set_endpoint(config);
+  }
+
+  export async function deleteEndpoint(id: string): Promise<void> {
+    const bridge = bridgeOrThrow().providers;
+    if (!bridge) throw new DesktopBridgeMissingError();
+    await bridge.delete_endpoint(id);
+  }
+
+  /**
+   * Discover the models an endpoint serves. The fetch happens on the
+   * agent host — the renderer's origin cannot reach a local Ollama
+   * directly (CORS). Throws when the bridge predates the surface or the
+   * endpoint is unreachable; callers fall back to manual entry.
+   */
+  export async function probeEndpoint(baseUrl: string): Promise<{
+    source: "ollama" | "openai";
+    models: ProbedEndpointModel[];
+  }> {
+    const bridge = bridgeOrThrow().providers;
+    if (!bridge?.probe_endpoint) throw new DesktopBridgeMissingError();
+    return await bridge.probe_endpoint(baseUrl);
+  }
+
+  /**
+   * Reveal `endpoints.json` (the hand-editable config — `overrides` for
+   * power users live there) in the OS file manager. Returns `false` when
+   * the surface isn't available (old binary, or the web daemon bridge
+   * which has no native shell) — callers hide the affordance.
+   */
+  export async function revealConfigFile(): Promise<boolean> {
+    if (!canRevealConfigFile()) return false;
+    const bridge = getDesktopBridge()!;
+    const { path } = await bridge.providers!.info();
+    await bridge.shell.show_item_in_folder(path);
+    return true;
+  }
+
+  /** Whether {@link revealConfigFile} can work in this host. */
+  export function canRevealConfigFile(): boolean {
+    const bridge = getDesktopBridge();
+    return Boolean(bridge?.providers?.info && bridge.caps.native.shell);
+  }
+
+  /**
+   * Native confirm for the destructive "Remove endpoint" action —
+   * same convention as `secrets.confirmDeleteKey`.
+   */
+  export async function confirmDeleteEndpoint(label: string): Promise<boolean> {
+    const choice = await bridgeOrThrow().dialog.confirm({
+      message: `Remove ${label}?`,
+      detail:
+        "The agent will stop using this endpoint and its registered models. You can add it back any time.",
+      buttons: ["Remove", "Cancel"],
+      default_id: 1,
+      cancel_id: 1,
+    });
+    return choice === 0;
+  }
+}
+
 /* ───────────────────────── app namespace ────────────────────── */
 
 export type DesktopAppInfo = {
diff --git a/editor/scaffolds/desktop/ai-sidebar/chat.tsx b/editor/scaffolds/desktop/ai-sidebar/chat.tsx
index eabb72ff3..4247629b1 100644
--- a/editor/scaffolds/desktop/ai-sidebar/chat.tsx
+++ b/editor/scaffolds/desktop/ai-sidebar/chat.tsx
@@ -31,7 +31,6 @@ import {
 } from "@app/ui/ai-elements/conversation";
 import { cn } from "@app/ui/lib/utils";
 import type { ComposerCatalog } from "@/kits/composer";
-import _models from "@grida/ai-models";
 import {
   AGENT_SESSION_AGENT,
   sessions as bridgeSessions,
@@ -61,9 +60,14 @@ import {
 import { ChatSessionPicker } from "../shared/chat-session-picker";
 import {
   DesktopModelPicker,
+  ModelToolCallNotice,
   useModelPickerState,
 } from "../shared/model-picker";
 import { DesktopContextMeter } from "../shared/context-meter";
+import {
+  registered_models,
+  useEndpointProviders,
+} from "../shared/registered-models";
 import {
   AgentComposerInput,
   type ComposerCommandAction,
@@ -227,18 +231,24 @@ export function AISidebarChat({ className }: { className?: string }) {
     setMessages(chatSession.initial_messages);
   }, [chatSession.initial_messages, setMessages]);
 
+  // Configured endpoint providers (issue #806): their registered models
+  // join the picker and the capability gates below.
+  const endpoints = useEndpointProviders();
+
   // Flat model selection (ignores tiers). Seeds from the active
   // session's stored model and rides each send as `body.modelId`.
   const { model_id: modelId, setModelId } = useModelPickerState({
     current_id: chatSession.current_id,
     sessions: chatSession.sessions,
+    endpoints,
   });
 
-  // Whether the active model accepts image input — memoized so the catalog
-  // lookup doesn't re-scan on every render (only when the model changes).
+  // Whether the active model accepts image input — memoized so the
+  // registry lookup doesn't re-scan on every render (only when the model
+  // or endpoint list changes).
   const multimodal = useMemo(
-    () => _models.text.modelSpecById(modelId)?.multimodal ?? false,
-    [modelId]
+    () => registered_models.resolve(modelId, endpoints)?.multimodal ?? false,
+    [modelId, endpoints]
   );
 
   // The active session row carries the rolled-up cost the context meter
@@ -272,6 +282,7 @@ export function AISidebarChat({ className }: { className?: string }) {
       sendMessage,
       sessionId: chatSession.current_id,
       modelId,
+      providerId: registered_models.providerIdForModel(modelId, endpoints),
     }),
   });
 
@@ -440,6 +451,8 @@ export function AISidebarChat({ className }: { className?: string }) {
 
       <QueuedMessages queued={queued} onCancel={cancelQueued} />
 
+      <ModelToolCallNotice model_id={modelId} endpoints={endpoints} />
+
       <div className="shrink-0 border-t p-3">
         <AgentComposerInput
           catalog={EMPTY_CATALOG}
@@ -452,11 +465,16 @@ export function AISidebarChat({ className }: { className?: string }) {
           multimodal={multimodal}
           toolbar={
             <>
-              <DesktopModelPicker value={modelId} onValueChange={setModelId} />
+              <DesktopModelPicker
+                value={modelId}
+                onValueChange={setModelId}
+                endpoints={endpoints}
+              />
               <DesktopContextMeter
                 messages={messages}
                 modelId={modelId}
                 costUsd={activeSession?.cost_usd}
+                endpoints={endpoints}
               />
             </>
           }
diff --git a/editor/scaffolds/desktop/shared/context-meter.tsx b/editor/scaffolds/desktop/shared/context-meter.tsx
index 727ef42c1..7758985eb 100644
--- a/editor/scaffolds/desktop/shared/context-meter.tsx
+++ b/editor/scaffolds/desktop/shared/context-meter.tsx
@@ -17,9 +17,8 @@
 
 import { useMemo } from "react";
 import type { UIMessage } from "ai";
-// `@grida/ai-models` is the framework-free catalog (renderer-safe, unlike
-// the `@/lib/ai/models` server seam) — same import the model picker uses.
-import _models from "@grida/ai-models";
+import type { EndpointProviderConfig } from "@/lib/desktop/bridge";
+import { registered_models } from "./registered-models";
 import { Button } from "@app/ui/components/button";
 import {
   Popover,
@@ -70,14 +69,23 @@ export function DesktopContextMeter({
   messages,
   modelId,
   costUsd,
+  endpoints = [],
 }: {
   messages: UIMessage[];
-  /** Active model id — its catalog spec supplies the context window. */
+  /** Active model id — its resolved spec supplies the context window. */
   modelId: string;
   /** Real session cost so far, in USD. Shown when > 0. */
   costUsd?: number;
+  /** Configured endpoint providers (issue #806) — registered local models
+   *  resolve their real (often small) windows through these. */
+  endpoints?: readonly EndpointProviderConfig[];
 }) {
-  const contextWindow = _models.text.modelSpecById(modelId)?.contextWindow;
+  // Memoized: chat panels re-render per streamed token, and resolve()
+  // rebuilds the flattened spec list each call.
+  const contextWindow = useMemo(
+    () => registered_models.resolve(modelId, endpoints)?.contextWindow,
+    [modelId, endpoints]
+  );
   const {
     usedTokens,
     maxTokens,
diff --git a/editor/scaffolds/desktop/shared/model-picker.tsx b/editor/scaffolds/desktop/shared/model-picker.tsx
index c0f87eebc..6ad33f926 100644
--- a/editor/scaffolds/desktop/shared/model-picker.tsx
+++ b/editor/scaffolds/desktop/shared/model-picker.tsx
@@ -1,5 +1,7 @@
 /**
- * Desktop model picker — flat list of every catalog model.
+ * Desktop model picker — flat list of every catalog model, plus any
+ * user-registered endpoint models (issue #806 — local Ollama, self-
+ * hosted gateways).
  *
  * The agent system is tier-based (4 tiers → 4 models), but the catalog
  * holds more models than the tiers map to, leaving some unreachable.
@@ -11,7 +13,8 @@
 
 "use client";
 
-import { useEffect, useRef, useState } from "react";
+import { useEffect, useMemo, useRef, useState } from "react";
+import { TriangleAlertIcon } from "lucide-react";
 import {
   PromptInputSelect,
   PromptInputSelectContent,
@@ -24,7 +27,11 @@ import {
 // providers (live keys) and is lint-blocked from the desktop renderer
 // (GRIDA-SEC-004). This package is pure data and renderer-safe.
 import _models, { TIER_MODEL_IDS } from "@grida/ai-models";
-import type { ChatSessionRow } from "@/lib/desktop/bridge";
+import type {
+  ChatSessionRow,
+  EndpointProviderConfig,
+} from "@/lib/desktop/bridge";
+import { registered_models } from "./registered-models";
 
 const catalog = _models.text.catalog;
 type CatalogId = _models.text.CatalogId;
@@ -41,9 +48,13 @@ function isCatalogId(id: string | undefined | null): id is CatalogId {
 export function DesktopModelPicker({
   value,
   onValueChange,
+  endpoints = [],
 }: {
   value: string;
   onValueChange: (modelId: string) => void;
+  /** Configured endpoint providers whose registered models join the list
+   *  (grouped under the endpoint's label). */
+  endpoints?: readonly EndpointProviderConfig[];
 }) {
   return (
     <PromptInputSelect value={value} onValueChange={onValueChange}>
@@ -60,11 +71,57 @@ export function DesktopModelPicker({
             {_models.text.displayLabel(m)}
           </PromptInputSelectItem>
         ))}
+        {endpoints.map((endpoint) =>
+          endpoint.models.map((m) => (
+            <PromptInputSelectItem
+              key={`${endpoint.id}/${m.id}`}
+              value={m.id}
+              className="text-xs"
+            >
+              {m.label ?? m.id}
+              <span className="text-muted-foreground">
+                {" "}
+                · {endpoint.label ?? endpoint.id}
+              </span>
+            </PromptInputSelectItem>
+          ))
+        )}
       </PromptInputSelectContent>
     </PromptInputSelect>
   );
 }
 
+/**
+ * Inline notice for a selected model that is marked `tool_call: false`
+ * (issue #806). The agent loop is tool-heavy (files, commands, todos) —
+ * gating is deliberately permissive (the run is not blocked), so the
+ * honest move is a visible expectation-setter, not a hard stop.
+ */
+export function ModelToolCallNotice({
+  model_id: modelId,
+  endpoints,
+}: {
+  model_id: string;
+  endpoints: readonly EndpointProviderConfig[];
+}) {
+  // Memoized: this renders inside chat panels that re-render per streamed
+  // token, and resolve() rebuilds the flattened spec list each call.
+  const spec = useMemo(
+    () => registered_models.resolve(modelId, endpoints),
+    [modelId, endpoints]
+  );
+  if (!spec || spec.tool_call) return null;
+  return (
+    <div className="flex items-start gap-2 border-t bg-amber-500/10 px-3 py-2 text-xs text-amber-700 dark:text-amber-400">
+      <TriangleAlertIcon className="mt-0.5 size-3.5 shrink-0" />
+      <span>
+        {spec.label} is marked as not supporting tool calls — the agent&apos;s
+        file, command, and planning abilities may not work with it.
+      </span>
+    </div>
+  );
+}
+
 /**
  * Model selection state for a chat panel. Defaults to
  * {@link DEFAULT_MODEL_ID} (or `initial`, when a caller seeds one — e.g.
@@ -78,15 +135,26 @@ export function useModelPickerState({
   current_id: currentId,
   sessions,
   initial,
+  endpoints = [],
 }: {
   current_id: string | null;
   sessions: ChatSessionRow[];
   /** Initial selection, applied only on first mount. Falls back to
-   * {@link DEFAULT_MODEL_ID} when absent or not a known catalog id. */
+   * {@link DEFAULT_MODEL_ID} when absent or not a known model id. */
   initial?: string;
+  /** Configured endpoint providers — their registered model ids count as
+   *  known, so a session that ran on a local model re-seeds correctly. */
+  endpoints?: readonly EndpointProviderConfig[];
 }): { model_id: string; setModelId: (id: string) => void } {
+  const registeredIds = useMemo(
+    () => new Set(registered_models.specs(endpoints).map((m) => m.id)),
+    [endpoints]
+  );
+  const isKnownId = (id: string | undefined | null): id is string =>
+    isCatalogId(id) || (typeof id === "string" && registeredIds.has(id));
+
   const [modelId, setModelId] = useState<string>(
-    isCatalogId(initial) ? initial : DEFAULT_MODEL_ID
+    isKnownId(initial) ? initial : DEFAULT_MODEL_ID
   );
   // The session id we last seeded from. Re-seed only when the active id
   // changes — `undefined` means "never seeded" so the first run fires.
@@ -104,9 +172,18 @@ export function useModelPickerState({
     // committing, so we don't lock in the default and skip the real seed.
     if (!row) return;
     const stored = row.model?.model_id;
-    if (isCatalogId(stored)) setModelId(stored);
-    seededFor.current = currentId;
-  }, [currentId, sessions]);
+    if (isKnownId(stored)) {
+      setModelId(stored);
+      seededFor.current = currentId;
+      return;
+    }
+    // Stored id not (yet) known. Endpoints load async — when the session
+    // ran on a registered local model, leave the seed open so the
+    // `registeredIds` dep can complete it once the endpoint list lands.
+    // A session with NO stored model is seeded-done immediately.
+    if (!stored) seededFor.current = currentId;
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, [currentId, sessions, registeredIds]);
 
   return { model_id: modelId, setModelId };
 }
diff --git a/editor/scaffolds/desktop/shared/registered-models.ts b/editor/scaffolds/desktop/shared/registered-models.ts
new file mode 100644
index 000000000..cc8499b86
--- /dev/null
+++ b/editor/scaffolds/desktop/shared/registered-models.ts
@@ -0,0 +1,79 @@
+/**
+ * Registered (endpoint) models in the desktop renderer — issue #806.
+ *
+ * One fetch surface + pure resolution helpers shared by the model
+ * picker, the capability gates (multimodal / tool_call), and the context
+ * meter, so every consumer resolves a model id the same way: static
+ * catalog ∪ user-registered endpoint models via
+ * `models.text.registry.resolve`.
+ */
+
+"use client";
+
+import { useEffect, useState } from "react";
+import _models from "@grida/ai-models";
+import { resolveEndpointModels } from "@grida/agent";
+import {
+  providers,
+  type EndpointModelSpec,
+  type EndpointProviderConfig,
+} from "@/lib/desktop/bridge";
+
+export namespace registered_models {
+  /** Flatten endpoint configs into the registry's custom-spec list —
+   *  OVERRIDE-RESOLVED, mirroring the host's `registeredModels()`. */
+  export function specs(
+    endpoints: readonly EndpointProviderConfig[]
+  ): EndpointModelSpec[] {
+    return endpoints.flatMap((endpoint) => resolveEndpointModels(endpoint));
+  }
+
+  /** Resolve a model id over catalog ∪ registered (normalized defaults). */
+  export function resolve(
+    modelId: string,
+    endpoints: readonly EndpointProviderConfig[]
+  ): _models.text.registry.ResolvedModelSpec | undefined {
+    return _models.text.registry.resolve(modelId, specs(endpoints));
+  }
+
+  /**
+   * The endpoint provider id serving `modelId`, or `undefined` for
+   * catalog models. Rides each send as `provider_id` so an explicit
+   * local-model pick can't be swallowed by the BYOK-first cascade (a
+   * stored OpenRouter key cannot serve `llama3.1:8b`).
+   */
+  export function providerIdForModel(
+    modelId: string,
+    endpoints: readonly EndpointProviderConfig[]
+  ): string | undefined {
+    return endpoints.find((endpoint) =>
+      endpoint.models.some((m) => m.id === modelId)
+    )?.id;
+  }
+}
+
+/**
+ * The configured endpoint providers, fetched once per mount. `[]` while
+ * loading, outside the desktop renderer, or on an old binary without the
+ * bridge surface — every consumer degrades to catalog-only behavior.
+ */
+export function useEndpointProviders(): EndpointProviderConfig[] {
+  const [endpoints, setEndpoints] = useState<EndpointProviderConfig[]>([]);
+  useEffect(() => {
+    let cancelled = false;
+    if (!providers.isSupported()) return;
+    providers
+      .listEndpoints()
+      .then((list) => {
+        if (!cancelled) setEndpoints(list);
+      })
+      .catch(() => {
+        // Endpoint config is additive — a failed fetch degrades to
+        // catalog-only models, never blocks the chat.
+      });
+    return () => {
+      cancelled = true;
+    };
+  }, []);
+  return endpoints;
+}
diff --git a/editor/scaffolds/desktop/workbench/agent-pane.tsx b/editor/scaffolds/desktop/workbench/agent-pane.tsx
index 2ed15cb72..fc48213f7 100644
--- a/editor/scaffolds/desktop/workbench/agent-pane.tsx
+++ b/editor/scaffolds/desktop/workbench/agent-pane.tsx
@@ -50,7 +50,6 @@ import {
   type WelcomeHandoff,
 } from "@/lib/desktop/welcome-handoff";
 import { useDesktopAgentFocusSession } from "@/lib/desktop/agent-focus-session";
-import _models from "@grida/ai-models";
 import {
   buildAgentSend,
   buildApprovalResumeBody,
@@ -76,10 +75,15 @@ import { QueuedMessages } from "../shared/queued-messages";
 import { ChatSessionPicker } from "../shared/chat-session-picker";
 import {
   DesktopModelPicker,
+  ModelToolCallNotice,
   useModelPickerState,
 } from "../shared/model-picker";
 import { DesktopModePicker, useModePickerState } from "../shared/mode-picker";
 import { DesktopContextMeter } from "../shared/context-meter";
+import {
+  registered_models,
+  useEndpointProviders,
+} from "../shared/registered-models";
 import {
   AgentComposerInput,
   type ComposerCommandAction,
@@ -345,6 +349,10 @@ function AgentPaneContent({
     setMessages(chatSession.initial_messages);
   }, [chatSession.initial_messages, setMessages]);
 
+  // Configured endpoint providers (issue #806): their registered models
+  // join the picker and the capability gates below.
+  const endpoints = useEndpointProviders();
+
   // Flat model selection (ignores tiers). Seeds from the welcome
   // composer's pick on a handed-off fresh session, otherwise from the
   // active session's stored model, and rides each send as `body.modelId`.
@@ -352,6 +360,7 @@ function AgentPaneContent({
     current_id: chatSession.current_id,
     sessions: chatSession.sessions,
     initial: handoff?.model_id,
+    endpoints,
   });
 
   // Permission/supervision posture (RFC `permission modes`). Seeds from the
@@ -361,11 +370,12 @@ function AgentPaneContent({
     sessions: chatSession.sessions,
   });
 
-  // Whether the active model accepts image input — memoized so the catalog
-  // lookup doesn't re-scan on every render (only when the model changes).
+  // Whether the active model accepts image input — memoized so the
+  // registry lookup doesn't re-scan on every render (only when the model
+  // or endpoint list changes).
   const multimodal = useMemo(
-    () => _models.text.modelSpecById(modelId)?.multimodal ?? false,
-    [modelId]
+    () => registered_models.resolve(modelId, endpoints)?.multimodal ?? false,
+    [modelId, endpoints]
   );
 
   // The active session row carries the rolled-up cost the context meter
@@ -395,6 +405,10 @@ function AgentPaneContent({
   // the optimistic mirror, shared with `ai-sidebar/chat.tsx`. Skills ride the
   // live `send` from the active tab; a core-drained turn uses the session's
   // discovered skills (no per-send subset — the renderer has no tab there).
+  // Endpoint provider pin for the active model (issue #806) — rides every
+  // run-entering body: normal sends AND approval resumes below.
+  const providerId = registered_models.providerIdForModel(modelId, endpoints);
+
   const {
     queued,
     cancel: cancelQueued,
@@ -408,6 +422,7 @@ function AgentPaneContent({
       sendMessage,
       sessionId: chatSession.current_id,
       modelId,
+      providerId,
       mode,
       skills: skillsForActiveTab(activeRelPath),
     }),
@@ -540,6 +555,7 @@ function AgentPaneContent({
         body: buildApprovalResumeBody({
           session_id: chatSession.current_id ?? undefined,
           model_id: modelId,
+          provider_id: providerId,
           mode,
           tool_call_id: pending.toolCallId,
           approval_id: pending.approvalId,
@@ -547,7 +563,7 @@ function AgentPaneContent({
         }),
       });
     },
-    [chat, chatSession.current_id, modelId, mode]
+    [chat, chatSession.current_id, modelId, providerId, mode]
   );
 
   // A pending supervised approval (the model called a mutating command in
@@ -617,6 +633,8 @@ function AgentPaneContent({
 
       <QueuedMessages queued={queued} onCancel={cancelQueued} />
 
+      <ModelToolCallNotice model_id={modelId} endpoints={endpoints} />
+
       {/* Hidden while the session is busy: clicking Allow/Deny starts the
           resume turn (busy → true), so the bar vanishes on click — instant
           feedback, no optimistic message mutation needed. */}
@@ -636,11 +654,16 @@ function AgentPaneContent({
           toolbar={
             <>
               <DesktopModePicker value={mode} onValueChange={setMode} />
-              <DesktopModelPicker value={modelId} onValueChange={setModelId} />
+              <DesktopModelPicker
+                value={modelId}
+                onValueChange={setModelId}
+                endpoints={endpoints}
+              />
               <DesktopContextMeter
                 messages={messages}
                 modelId={modelId}
                 costUsd={activeSession?.cost_usd}
+                endpoints={endpoints}
               />
             </>
           }
diff --git a/packages/grida-ai-agent/README.md b/packages/grida-ai-agent/README.md
index e5efedf96..ccfaba3f8 100644
--- a/packages/grida-ai-agent/README.md
+++ b/packages/grida-ai-agent/README.md
@@ -43,11 +43,14 @@ public subpath; workspace bindings use it in-process.
 The perimeter that keeps this package small. A feature request that
 crosses one of these is the wrong tool, not a missing feature.
 
-- **Not a general model-provider router.** V1 provider selection is
-  BYOK-only and isolated to the node-only `providers/` layer
-  (OpenRouter → AI Gateway). The agent + runtime core never import
+- **Not a general model-provider router.** Provider selection is
+  isolated to the node-only `providers/` layer: the BYOK key slots
+  (OpenRouter → AI Gateway) plus ONE generalized OpenAI-compatible
+  endpoint type (`{base_url, optional key, registered models}` — Ollama
+  is the preset; issue #806). The agent + runtime core never import
   selection; they receive a resolved `ModelFactory`. There is no
-  registry for arbitrary third-party providers.
+  registry for arbitrary third-party providers — new hosted providers are
+  new BYOK slots, not config.
 - **Not a hosted model gateway.** The package does not proxy model calls
   through grida.co, own OAuth sessions, or mint hosted provider tokens.
 - **Not a billing or entitlement engine.** The package forwards per-step
diff --git a/packages/grida-ai-agent/src/__public-api__.test.ts b/packages/grida-ai-agent/src/__public-api__.test.ts
index 74bd82854..7929d5f57 100644
--- a/packages/grida-ai-agent/src/__public-api__.test.ts
+++ b/packages/grida-ai-agent/src/__public-api__.test.ts
@@ -180,6 +180,34 @@ describe("@grida/agent public API", () => {
       expect(row.agent).toBe("grida");
     });
 
+    it("exposes the endpoint-provider contract (issue #806)", () => {
+      expect(root.OLLAMA_ENDPOINT_PRESET).toEqual({
+        id: "ollama",
+        label: "Ollama",
+        base_url: "http://localhost:11434/v1",
+      });
+      expect(typeof root.isValidEndpointProviderId).toBe("function");
+      expect(typeof root.validateEndpointProviderConfig).toBe("function");
+      expect(typeof root.mergeProbedModels).toBe("function");
+      expect(root.isByokProviderId("openrouter")).toBe(true);
+      expect(root.isByokProviderId("ollama")).toBe(false);
+      const config: root.EndpointProviderConfig = {
+        ...root.OLLAMA_ENDPOINT_PRESET,
+        models: [{ id: "llama3.1:8b", tool_call: true }],
+      };
+      const model: root.EndpointModelSpec = config.models[0];
+      expect(model.id).toBe("llama3.1:8b");
+      // The model-id and provider-id wire types are open: a registered
+      // local id type-checks (the runtime gate still validates it).
+      const localModel: AgentModelId = "llama3.1:8b";
+      const localRun: AgentRunOptions = {
+        messages: [],
+        provider_id: "ollama",
+        model_id: localModel,
+      };
+      expect(localRun.provider_id).toBe("ollama");
+    });
+
     it("does not expose internal runtime/provider/server modules from the root", () => {
       expect("AgentRuntime" in root).toBe(false);
       expect("StreamRegistry" in root).toBe(false);
diff --git a/packages/grida-ai-agent/src/http/routes/handshake.ts b/packages/grida-ai-agent/src/http/routes/handshake.ts
index 708288ef3..b6ef3f823 100644
--- a/packages/grida-ai-agent/src/http/routes/handshake.ts
+++ b/packages/grida-ai-agent/src/http/routes/handshake.ts
@@ -30,6 +30,7 @@ const SUPPORTS_TAGS: Record<keyof AgentServerCapabilities, string> = {
   agent: "agent@1",
   workspaces: "workspaces@1",
   sessions: "sessions@1",
+  providers: "providers@1",
   shell: "shell@1",
 };
 
diff --git a/packages/grida-ai-agent/src/http/routes/providers.ts b/packages/grida-ai-agent/src/http/routes/providers.ts
new file mode 100644
index 000000000..a0fd5519b
--- /dev/null
+++ b/packages/grida-ai-agent/src/http/routes/providers.ts
@@ -0,0 +1,126 @@
+/**
+ * GRIDA-SEC-004 — `/providers/endpoints/*` routes (issue #806).
+ *
+ * CRUD over the endpoint provider config store: user-configured
+ * OpenAI-compatible endpoints (Ollama preset, self-hosted gateways).
+ *
+ * Unlike `/secrets/*`, configs ARE readable back to the client — an
+ * endpoint config is plain config (base URL + registered models), not a
+ * credential. The optional API key for a keyed gateway still rides the
+ * `/secrets/*` surface under the endpoint's id and never appears here.
+ *
+ * Threat note (reviewed): `base_url` is user-controlled egress — once an
+ * endpoint is configured and picked, conversation content flows to it.
+ * That is the feature (same trust model as BYOK: the desktop user points
+ * their own agent at their own endpoint), and the writer is the same
+ * authenticated loopback client that could already set a BYOK key. The
+ * validator pins the shape (http(s) URL, bounded sizes) so a config
+ * write can't smuggle arbitrary blobs.
+ */
+
+import type { Hono } from "hono";
+import {
+  parseEndpointBaseUrl,
+  validateEndpointProviderConfig,
+  type EndpointProviderConfig,
+} from "../../protocol/endpoints";
+import type { EndpointProvidersStore } from "../../providers/endpoints";
+import { probeEndpointModels } from "../../providers/probe";
+import type { SecretsStore } from "../../secrets";
+import { body, v } from "../validate";
+
+export type ProvidersRoutesDeps = {
+  endpoints: EndpointProvidersStore;
+  /**
+   * When present, deleting an endpoint also deletes the key stored under
+   * its id. Without this, the key would be orphaned in auth.json: the
+   * `/secrets/*` allowlist only accepts CONFIGURED endpoint ids, so the
+   * leftover would be undeletable — and re-creating the same endpoint id
+   * later would silently reuse the stale credential.
+   */
+  secrets?: SecretsStore;
+  /** Probe override for tests. Defaults to {@link probeEndpointModels}. */
+  probe?: typeof probeEndpointModels;
+};
+
+export function registerProvidersRoutes(app: Hono, deps: ProvidersRoutesDeps) {
+  const { endpoints, secrets } = deps;
+  const probe = deps.probe ?? probeEndpointModels;
+
+  app.post("/providers/endpoints/list", async (c) => {
+    const list: EndpointProviderConfig[] = await endpoints.list();
+    return c.json(list);
+  });
+
+  // Where the config JSON lives — the settings UI links developers to
+  // the hand-editable file (the `overrides` escape hatch lives there).
+  // Absolute paths are an accepted part of this surface (cf. workspaces).
+  app.post("/providers/endpoints/info", (c) =>
+    c.json({ path: endpoints.filePath })
+  );
+
+  app.post("/providers/endpoints/set", async (c) => {
+    let raw: unknown;
+    try {
+      raw = await c.req.json();
+    } catch {
+      raw = undefined;
+    }
+    const config = (raw as { config?: unknown } | undefined)?.config;
+    const result = validateEndpointProviderConfig(config);
+    if (!result.ok) {
+      return c.json({ error: `config ${result.error}` }, 400);
+    }
+    try {
+      await endpoints.set(result.config);
+    } catch (err) {
+      const message = err instanceof Error ? err.message : String(err);
+      // The store's own rejections (re-validation, entry cap) are client
+      // errors; anything else is a persistence failure (disk full, no
+      // write permission) — the payload wasn't the problem.
+      if (message.startsWith("[agent-host-endpoints]")) {
+        return c.json({ error: message }, 400);
+      }
+      console.error(`[agent-host-providers] endpoint set failed: ${message}`);
+      return c.json({ error: "failed to persist endpoint config" }, 500);
+    }
+    console.log(
+      `[agent-host-providers] endpoint set id=${result.config.id} models=${result.config.models.length}`
+    );
+    return c.json({ ok: true });
+  });
+
+  app.post("/providers/endpoints/delete", async (c) => {
+    const r = await body(c, { id: v.string });
+    if (!r.ok) return r.res;
+    await endpoints.delete(r.data.id);
+    // The endpoint's key (if any) goes with it — see the deps doc. Both
+    // deletes are idempotent, so a partial failure is safe to retry.
+    await secrets?.delete(r.data.id);
+    console.log(`[agent-host-providers] endpoint delete id=${r.data.id}`);
+    return c.json({ ok: true });
+  });
+
+  // Model discovery (see providers/probe.ts for the threat note): the
+  // host fetches the endpoint's own model listing and returns the
+  // PARSED rows — never the raw body. Takes a base_url (not a stored
+  // id) so the settings flow can prefill before the config is saved.
+  app.post("/providers/endpoints/probe", async (c) => {
+    const r = await body(c, { base_url: v.string });
+    if (!r.ok) return r.res;
+    // Malformed input is the caller's fault (400); only a well-formed
+    // URL that doesn't answer is an upstream failure (502).
+    const parsed = parseEndpointBaseUrl(r.data.base_url);
+    if (!parsed.ok) {
+      return c.json({ error: parsed.error }, 400);
+    }
+    const result = await probe(parsed.base_url);
+    if (!result.ok) {
+      return c.json({ error: result.error }, 502);
+    }
+    console.log(
+      `[agent-host-providers] probe source=${result.source} models=${result.models.length}`
+    );
+    return c.json({ source: result.source, models: result.models });
+  });
+}
diff --git a/packages/grida-ai-agent/src/http/routes/secrets.ts b/packages/grida-ai-agent/src/http/routes/secrets.ts
index 9b23965d1..84cc774e2 100644
--- a/packages/grida-ai-agent/src/http/routes/secrets.ts
+++ b/packages/grida-ai-agent/src/http/routes/secrets.ts
@@ -11,8 +11,10 @@
  * not the key itself.
  *
  * Allowed provider ids — a closed set:
- *   - `openrouter`
- *   - `vercel`
+ *   - the BYOK ids (`openrouter`, `vercel`)
+ *   - ids of CONFIGURED endpoint providers (issue #806) — a self-hosted
+ *     gateway may need a key; Ollama doesn't, but its slot still accepts
+ *     one harmlessly.
  *
  * Any other id is rejected with a 400 so a typo doesn't silently create a
  * never-used auth.json entry.
@@ -26,28 +28,54 @@
 
 import type { Hono } from "hono";
 import { BYOK_PROVIDER_IDS } from "../../protocol/provider-ids";
+import {
+  isKnownProviderId,
+  type EndpointProvidersStore,
+} from "../../providers/endpoints";
 import type { SecretsStore } from "../../secrets";
 import { body, v } from "../validate";
 
 export type SecretsRoutesDeps = {
   store: SecretsStore;
+  /** When present, ids of configured endpoint providers are also allowed
+   *  (a keyed self-hosted gateway stores its key under its endpoint id). */
+  endpoints?: EndpointProvidersStore;
 };
 
 export function registerSecretsRoutes(app: Hono, deps: SecretsRoutesDeps) {
-  const { store } = deps;
+  const { store, endpoints } = deps;
+
+  const allowedProviderId = async (
+    id: string
+  ): Promise<{ ok: true } | { ok: false; res: Response }> => {
+    if (await isKnownProviderId(id, endpoints)) return { ok: true };
+    return {
+      ok: false,
+      res: Response.json(
+        {
+          error: `provider_id must be one of: ${BYOK_PROVIDER_IDS.join(", ")}, or a configured endpoint id`,
+        },
+        { status: 400 }
+      ),
+    };
+  };
 
   app.post("/secrets/has", async (c) => {
-    const r = await body(c, { provider_id: v.oneOf(BYOK_PROVIDER_IDS) });
+    const r = await body(c, { provider_id: v.string });
     if (!r.ok) return r.res;
+    const allowed = await allowedProviderId(r.data.provider_id);
+    if (!allowed.ok) return allowed.res;
     return c.json({ has: await store.has(r.data.provider_id) });
   });
 
   app.post("/secrets/set", async (c) => {
     const r = await body(c, {
-      provider_id: v.oneOf(BYOK_PROVIDER_IDS),
+      provider_id: v.string,
       key: v.stringAllowEmpty,
     });
     if (!r.ok) return r.res;
+    const allowed = await allowedProviderId(r.data.provider_id);
+    if (!allowed.ok) return allowed.res;
     if (r.data.key.trim().length === 0) {
       return c.json({ error: "key must not be empty or whitespace-only" }, 400);
     }
@@ -57,8 +85,10 @@ export function registerSecretsRoutes(app: Hono, deps: SecretsRoutesDeps) {
   });
 
   app.post("/secrets/delete", async (c) => {
-    const r = await body(c, { provider_id: v.oneOf(BYOK_PROVIDER_IDS) });
+    const r = await body(c, { provider_id: v.string });
     if (!r.ok) return r.res;
+    const allowed = await allowedProviderId(r.data.provider_id);
+    if (!allowed.ok) return allowed.res;
     await store.delete(r.data.provider_id);
     console.log(`[agent-host-secrets] delete providerId=${r.data.provider_id}`);
     return c.json({ ok: true });
diff --git a/packages/grida-ai-agent/src/http/server.ts b/packages/grida-ai-agent/src/http/server.ts
index d735d9ca2..413352665 100644
--- a/packages/grida-ai-agent/src/http/server.ts
+++ b/packages/grida-ai-agent/src/http/server.ts
@@ -12,6 +12,7 @@ import {
 import { registerFilesRoutes } from "./routes/files";
 import { registerRecentRoutes } from "./routes/recent";
 import { registerSecretsRoutes } from "./routes/secrets";
+import { registerProvidersRoutes } from "./routes/providers";
 import { registerAgentRoutes } from "./routes/agent";
 import { registerWorkspacesRoutes } from "./routes/workspaces";
 import { registerSessionsRoutes } from "./routes/sessions";
@@ -19,6 +20,7 @@ import { FileRegistry } from "../files/registry";
 import { RecentStore } from "../files/recent";
 import { AuthStore } from "../auth/file";
 import { SecretsStore } from "../secrets";
+import { EndpointProvidersStore } from "../providers/endpoints";
 import { WorkspaceRegistry } from "../workspaces";
 import { openSessionsDb } from "../session/db";
 import { SessionsStore } from "../session/store";
@@ -118,6 +120,9 @@ export function buildServer(opts: ServerOptions): BuiltServer {
   const workspaceRegistry = new WorkspaceRegistry(opts.user_data_path);
   const authStore = new AuthStore(opts.user_data_path);
   const secretsStore = new SecretsStore(authStore);
+  // Endpoint provider configs (issue #806): plain config beside the
+  // secrets store, persisted at ${userData}/endpoints.json.
+  const endpointsStore = new EndpointProvidersStore(opts.user_data_path);
   // Chat sessions: SQLite at ${userData}/sessions.db. Opened once per
   // agent-host launch and closed via the returned cleanup. WAL mode in
   // sessions/db.ts lets a CLI inspector read concurrently.
@@ -135,6 +140,13 @@ export function buildServer(opts: ServerOptions): BuiltServer {
   if (opts.capabilities.secrets) {
     registerSecretsRoutes(app, {
       store: secretsStore,
+      endpoints: endpointsStore,
+    });
+  }
+  if (opts.capabilities.providers) {
+    registerProvidersRoutes(app, {
+      endpoints: endpointsStore,
+      secrets: secretsStore,
     });
   }
   // Agent runtime owns the run loop + the in-flight stream registry.
@@ -156,6 +168,7 @@ export function buildServer(opts: ServerOptions): BuiltServer {
   }
   const runtime = new AgentRuntime({
     secrets: secretsStore,
+    endpoints: endpointsStore,
     workspace_registry: workspaceRegistry,
     sessions_store: sessionsStore,
     streams: opts.stream_registry,
diff --git a/packages/grida-ai-agent/src/index.ts b/packages/grida-ai-agent/src/index.ts
index 3ec428a91..e115c113f 100644
--- a/packages/grida-ai-agent/src/index.ts
+++ b/packages/grida-ai-agent/src/index.ts
@@ -5,9 +5,25 @@
 export {
   BYOK_PROVIDER_METADATA,
   BYOK_PROVIDER_IDS,
+  isByokProviderId,
   type ByokProviderMetadata,
   type ByokProviderId,
+  type ProviderId,
 } from "./protocol/provider-ids";
+export {
+  OLLAMA_ENDPOINT_PRESET,
+  isValidEndpointProviderId,
+  mergeProbedModels,
+  resolveEndpointModel,
+  resolveEndpointModels,
+  validateEndpointProviderConfig,
+  type EndpointModelEntry,
+  type EndpointModelOverrides,
+  type EndpointModelSpec,
+  type EndpointProviderConfig,
+  type ProbedEndpointModel,
+  type ProbeMergeResult,
+} from "./protocol/endpoints";
 export {
   AGENT_SERVER_PROTOCOL,
   AGENT_SERVER_DEFAULT_CAPABILITIES,
diff --git a/packages/grida-ai-agent/src/neutral-globals.d.ts b/packages/grida-ai-agent/src/neutral-globals.d.ts
index 5511d4a05..3399ee702 100644
--- a/packages/grida-ai-agent/src/neutral-globals.d.ts
+++ b/packages/grida-ai-agent/src/neutral-globals.d.ts
@@ -6,3 +6,10 @@ declare const console: {
 
 declare function setTimeout(handler: () => void, timeout?: number): unknown;
 declare function clearTimeout(handle: unknown): void;
+
+/** WHATWG URL — universal (browsers + Node). Only the members the neutral
+ *  surface touches (endpoint base_url validation). */
+declare class URL {
+  constructor(url: string, base?: string | URL);
+  protocol: string;
+}
diff --git a/packages/grida-ai-agent/src/protocol/endpoints.ts b/packages/grida-ai-agent/src/protocol/endpoints.ts
new file mode 100644
index 000000000..6d45a34c7
--- /dev/null
+++ b/packages/grida-ai-agent/src/protocol/endpoints.ts
@@ -0,0 +1,427 @@
+/**
+ * Custom OpenAI-compatible endpoint providers (issue #806 — local LLMs).
+ *
+ * Client-safe identity + config contract for user-configured endpoints.
+ * Local **Ollama** is the flagship preset; any OpenAI-compatible gateway
+ * (LiteLLM, vLLM, an Azure-compatible proxy, …) fits the same shape. This
+ * is the package's ONE generalized endpoint-provider type — presets
+ * instantiate it; we deliberately do not grow an opencode-style
+ * config-declared provider registry (anti-goal: not a general
+ * model-provider router).
+ *
+ * An endpoint config is **plain config, not a secret**: a base URL plus
+ * the models the user registered for it. When a gateway needs an API key,
+ * the key lives in the `SecretsStore` under the endpoint's id (same
+ * presence/set/delete-only discipline as BYOK keys, GRIDA-SEC-003/004) —
+ * never inside this config, so the config can ride readable storage,
+ * routes, and the renderer bridge.
+ */
+
+import type { models } from "@grida/ai-models";
+import { isByokProviderId } from "./provider-ids";
+
+/** A model spec consumable by the open registry — `@grida/ai-models`'
+ *  custom spec (cost optional, capability flags explicit). This is the
+ *  RESOLVED shape; the stored shape is {@link EndpointModelEntry}. */
+export type EndpointModelSpec = models.text.registry.CustomModelSpec;
+
+/**
+ * Sticky human corrections for a model entry. Detection refresh NEVER
+ * writes these — they exist for the "the endpoint reports a wrong value"
+ * case and are set by hand-editing `endpoints.json` (or by the settings
+ * inputs shown when detection has nothing). Resolution order:
+ * override → detected → registry default.
+ */
+export type EndpointModelOverrides = Pick<
+  EndpointModelSpec,
+  "contextWindow" | "tool_call" | "multimodal"
+>;
+
+/**
+ * A model as STORED on an endpoint config. The top-level capability
+ * fields (`tool_call`, `contextWindow`, `multimodal`) are
+ * detection-owned: probe refresh overwrites them freely. Human
+ * corrections live in {@link EndpointModelOverrides} so a refresh can
+ * never clobber them. Resolve with {@link resolveEndpointModel} before
+ * feeding the registry.
+ */
+export type EndpointModelEntry = EndpointModelSpec & {
+  overrides?: EndpointModelOverrides;
+};
+
+/**
+ * A user-configured OpenAI-compatible endpoint provider.
+ *
+ * Resolvable (usable for a run) only when `models` is non-empty — an
+ * endpoint saved with just a base URL is valid config but not a provider
+ * the resolver will pick.
+ */
+export type EndpointProviderConfig = {
+  /** Stable id (`ollama`, `litellm`, …). See {@link ENDPOINT_PROVIDER_ID_PATTERN}. */
+  id: string;
+  /** Display label. Falls back to the id. */
+  label?: string;
+  /** OpenAI-compatible base URL, e.g. `http://localhost:11434/v1`. */
+  base_url: string;
+  /** Models this endpoint serves. */
+  models: EndpointModelEntry[];
+  /**
+   * The model every tier resolves to when a run doesn't pick an explicit
+   * model (the agent's tier→catalog map is meaningless to a local
+   * endpoint — background subagents like the titler/compactor must land
+   * on a model this endpoint actually serves). Defaults to `models[0]`.
+   */
+  default_model_id?: string;
+};
+
+/** Apply {@link EndpointModelOverrides} onto the detected fields —
+ *  override → detected (→ registry default downstream). */
+export function resolveEndpointModel(
+  entry: EndpointModelEntry
+): EndpointModelSpec {
+  const { overrides, ...detected } = entry;
+  return {
+    ...detected,
+    contextWindow: overrides?.contextWindow ?? detected.contextWindow,
+    tool_call: overrides?.tool_call ?? detected.tool_call,
+    multimodal: overrides?.multimodal ?? detected.multimodal,
+  };
+}
+
+/** All of an endpoint's models, override-resolved — the custom half of
+ *  the model-registry seam. */
+export function resolveEndpointModels(
+  config: EndpointProviderConfig
+): EndpointModelSpec[] {
+  return config.models.map(resolveEndpointModel);
+}
+
+/**
+ * The model a model_id-less run on this endpoint executes — explicit
+ * `default_model_id`, falling back to the first registered model. THE
+ * one source of the default-model rule: the provider factory and the
+ * runtime's limits resolution must agree on it, or compaction limits get
+ * computed for a different model than the one that actually runs.
+ * `undefined` ⇔ the endpoint has no models and is not resolvable.
+ */
+export function endpointDefaultModelId(
+  config: EndpointProviderConfig
+): string | undefined {
+  return config.default_model_id ?? config.models[0]?.id;
+}
+
+/**
+ * The Ollama preset — the "no signup, no key" path. `ollama serve`
+ * exposes an OpenAI-compatible API at this base URL; no API key exists
+ * or is sent.
+ */
+export const OLLAMA_ENDPOINT_PRESET = {
+  id: "ollama",
+  label: "Ollama",
+  base_url: "http://localhost:11434/v1",
+} as const;
+
+/**
+ * A model discovered by probing an endpoint (issue #806 — `POST
+ * /providers/endpoints/probe`). Carries only what the endpoint actually
+ * REPORTS: Ollama's `/api/tags` exposes ids + capability tags,
+ * `/api/ps` / `/api/show` expose the context window; a generic
+ * OpenAI-compatible `/models` exposes ids only.
+ */
+export type ProbedEndpointModel = {
+  id: string;
+  /** Whether the endpoint reports native tool-calling support. Absent
+   *  when the endpoint doesn't expose capabilities. */
+  tool_call?: boolean;
+  /**
+   * Context window in tokens, when the endpoint reports one. For a
+   * LOADED Ollama model this is the server's actual allocation
+   * (`/api/ps` `context_length`); otherwise the model's maximum
+   * (`/api/show` `model_info`). Absent when neither reports.
+   */
+  contextWindow?: number;
+};
+
+export type ProbeMergeResult = {
+  models: EndpointModelEntry[];
+  /** Count of models the probe found that the config didn't know
+   *  (appended at the end, detection fields prefilled). */
+  discovered: number;
+  /** Count of existing entries whose detected fields changed. */
+  updated: number;
+};
+
+/**
+ * Apply a probe result onto an endpoint's stored models — the executable
+ * form of the detection-owned contract on {@link EndpointModelEntry}:
+ * probed values overwrite the top-level detected fields (a silent probe —
+ * e.g. an ids-only gateway — keeps the previous detection), `overrides`
+ * are NEVER written, and models the probe discovered are appended.
+ * Pure; shared by every surface that refreshes detection.
+ */
+export function mergeProbedModels(
+  models: readonly EndpointModelEntry[],
+  probed: readonly ProbedEndpointModel[]
+): ProbeMergeResult {
+  const probedById = new Map(probed.map((m) => [m.id, m]));
+  let updated = 0;
+  const refreshed = models.map((m): EndpointModelEntry => {
+    const p = probedById.get(m.id);
+    if (!p) return m;
+    const next: EndpointModelEntry = {
+      ...m,
+      tool_call: p.tool_call ?? m.tool_call,
+      contextWindow: p.contextWindow ?? m.contextWindow,
+    };
+    if (
+      next.contextWindow !== m.contextWindow ||
+      next.tool_call !== m.tool_call
+    ) {
+      updated += 1;
+    }
+    return next;
+  });
+  const known = new Set(models.map((m) => m.id));
+  const discovered = probed
+    .filter((m) => !known.has(m.id))
+    .map(
+      (m): EndpointModelEntry => ({
+        id: m.id,
+        tool_call: m.tool_call,
+        contextWindow: m.contextWindow,
+      })
+    );
+  return {
+    models: [...refreshed, ...discovered],
+    discovered: discovered.length,
+    updated,
+  };
+}
+
+/**
+ * Endpoint ids: short lowercase slugs. Must not collide with the BYOK
+ * provider ids — both share the provider-id namespace on sessions,
+ * run options, and the secrets store.
+ */
+const ENDPOINT_PROVIDER_ID_PATTERN = /^[a-z][a-z0-9_-]{0,31}$/;
+
+export function isValidEndpointProviderId(id: string): boolean {
+  return ENDPOINT_PROVIDER_ID_PATTERN.test(id) && !isByokProviderId(id);
+}
+
+/** Narrow + pin an endpoint base URL: http(s) only. Shared by the config
+ *  validator and the probe so the two boundaries can't drift. `base_url`
+ *  is the TRIMMED input string (whitespace padding would survive `new
+ *  URL` parsing yet break the string-concatenated request base later) —
+ *  but never `url.href`, no other normalization surprises. */
+export function parseEndpointBaseUrl(
+  raw: unknown
+): { ok: true; base_url: string; url: URL } | { ok: false; error: string } {
+  if (typeof raw !== "string" || raw.length > MAX_BASE_URL_LEN) {
+    return { ok: false, error: "base_url must be a string" };
+  }
+  const trimmed = raw.trim();
+  if (trimmed.length === 0) {
+    return { ok: false, error: "base_url must be a valid URL" };
+  }
+  let url: URL;
+  try {
+    url = new URL(trimmed);
+  } catch {
+    return { ok: false, error: "base_url must be a valid URL" };
+  }
+  if (url.protocol !== "http:" && url.protocol !== "https:") {
+    return { ok: false, error: "base_url must be http(s)" };
+  }
+  return { ok: true, base_url: trimmed, url };
+}
+
+/** Bounds that keep a config a config (not an unbounded blob). */
+const MAX_MODELS = 64;
+const MAX_MODEL_ID_LEN = 128;
+const MAX_LABEL_LEN = 64;
+const MAX_BASE_URL_LEN = 2048;
+const MAX_TOKEN_LIMIT = 100_000_000;
+
+export type EndpointConfigValidation =
+  | { ok: true; config: EndpointProviderConfig }
+  | { ok: false; error: string };
+
+/**
+ * Narrow an untrusted value to an {@link EndpointProviderConfig}.
+ *
+ * Shared by the store (load-time hygiene) and the HTTP route (write-time
+ * 400s), so a config that persisted always re-validates. Returns a fresh
+ * object holding only known fields — unknown keys are dropped, never
+ * round-tripped.
+ */
+export function validateEndpointProviderConfig(
+  raw: unknown
+): EndpointConfigValidation {
+  if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
+    return { ok: false, error: "config must be an object" };
+  }
+  const c = raw as Record<string, unknown>;
+
+  if (typeof c.id !== "string" || !isValidEndpointProviderId(c.id)) {
+    return {
+      ok: false,
+      error:
+        "id must be a short lowercase slug and must not collide with a BYOK provider id",
+    };
+  }
+
+  if (
+    c.label !== undefined &&
+    (typeof c.label !== "string" || c.label.length > MAX_LABEL_LEN)
+  ) {
+    return { ok: false, error: `label must be a string ≤ ${MAX_LABEL_LEN}` };
+  }
+
+  const baseUrl = parseEndpointBaseUrl(c.base_url);
+  if (!baseUrl.ok) return baseUrl;
+
+  if (!Array.isArray(c.models) || c.models.length > MAX_MODELS) {
+    return { ok: false, error: `models must be an array of ≤ ${MAX_MODELS}` };
+  }
+  const modelSpecs: EndpointModelEntry[] = [];
+  const seen = new Set<string>();
+  for (const m of c.models) {
+    const validated = validateModelEntry(m);
+    if (!validated.ok) return validated;
+    if (seen.has(validated.entry.id)) {
+      return { ok: false, error: `duplicate model id: ${validated.entry.id}` };
+    }
+    seen.add(validated.entry.id);
+    modelSpecs.push(validated.entry);
+  }
+
+  let defaultModelId: string | undefined;
+  if (c.default_model_id !== undefined) {
+    if (
+      typeof c.default_model_id !== "string" ||
+      !seen.has(c.default_model_id)
+    ) {
+      return {
+        ok: false,
+        error: "default_model_id must name one of the registered models",
+      };
+    }
+    defaultModelId = c.default_model_id;
+  }
+
+  return {
+    ok: true,
+    config: {
+      id: c.id,
+      label: typeof c.label === "string" && c.label ? c.label : undefined,
+      base_url: baseUrl.base_url,
+      models: modelSpecs,
+      default_model_id: defaultModelId,
+    },
+  };
+}
+
+type ModelEntryValidation =
+  | { ok: true; entry: EndpointModelEntry }
+  | { ok: false; error: string };
+
+function validateModelEntry(raw: unknown): ModelEntryValidation {
+  if (!raw || typeof raw !== "object" || Array.isArray(raw)) {
+    return { ok: false, error: "model must be an object" };
+  }
+  const m = raw as Record<string, unknown>;
+  if (
+    typeof m.id !== "string" ||
+    m.id.length === 0 ||
+    m.id.length > MAX_MODEL_ID_LEN
+  ) {
+    return {
+      ok: false,
+      error: `model id must be a non-empty string ≤ ${MAX_MODEL_ID_LEN}`,
+    };
+  }
+  if (
+    m.label !== undefined &&
+    (typeof m.label !== "string" || m.label.length > MAX_LABEL_LEN)
+  ) {
+    return {
+      ok: false,
+      error: `model label must be a string ≤ ${MAX_LABEL_LEN}`,
+    };
+  }
+  const flags = validateCapabilityFields(m, "model");
+  if (!flags.ok) return flags;
+
+  let overrides: EndpointModelOverrides | undefined;
+  if (m.overrides !== undefined) {
+    if (
+      !m.overrides ||
+      typeof m.overrides !== "object" ||
+      Array.isArray(m.overrides)
+    ) {
+      return { ok: false, error: "model overrides must be an object" };
+    }
+    const o = m.overrides as Record<string, unknown>;
+    // Overrides carry only the detection-owned fields — no `outputLimit`.
+    const oFlags = validateCapabilityFields(o, "model overrides", [
+      "contextWindow",
+    ]);
+    if (!oFlags.ok) return oFlags;
+    overrides = {
+      multimodal: o.multimodal as boolean | undefined,
+      tool_call: o.tool_call as boolean | undefined,
+      contextWindow: o.contextWindow as number | undefined,
+    };
+    if (Object.values(overrides).every((v) => v === undefined)) {
+      overrides = undefined;
+    }
+  }
+
+  return {
+    ok: true,
+    entry: {
+      id: m.id,
+      label: typeof m.label === "string" && m.label ? m.label : undefined,
+      multimodal: m.multimodal as boolean | undefined,
+      tool_call: m.tool_call as boolean | undefined,
+      contextWindow: m.contextWindow as number | undefined,
+      outputLimit: m.outputLimit as number | undefined,
+      overrides,
+      // cost is intentionally not accepted from config input: a local/
+      // self-hosted model is unmetered on this rail, and a user-supplied
+      // price card would feed cost UI with invented numbers.
+    },
+  };
+}
+
+function validateCapabilityFields(
+  source: Record<string, unknown>,
+  scope: string,
+  limits: readonly ("contextWindow" | "outputLimit")[] = [
+    "contextWindow",
+    "outputLimit",
+  ]
+): { ok: true } | { ok: false; error: string } {
+  for (const flag of ["multimodal", "tool_call"] as const) {
+    if (source[flag] !== undefined && typeof source[flag] !== "boolean") {
+      return { ok: false, error: `${scope} ${flag} must be a boolean` };
+    }
+  }
+  for (const limit of limits) {
+    const value = source[limit];
+    if (value === undefined) continue;
+    if (
+      typeof value !== "number" ||
+      !Number.isInteger(value) ||
+      value <= 0 ||
+      value > MAX_TOKEN_LIMIT
+    ) {
+      return {
+        ok: false,
+        error: `${scope} ${limit} must be a positive integer`,
+      };
+    }
+  }
+  return { ok: true };
+}
diff --git a/packages/grida-ai-agent/src/protocol/handshake.ts b/packages/grida-ai-agent/src/protocol/handshake.ts
index 3ff73a3d5..b013231fc 100644
--- a/packages/grida-ai-agent/src/protocol/handshake.ts
+++ b/packages/grida-ai-agent/src/protocol/handshake.ts
@@ -11,6 +11,12 @@ export type AgentServerCapabilities = {
   agent: boolean;
   workspaces: boolean;
   sessions: boolean;
+  /**
+   * `/providers/endpoints/*` — endpoint provider config CRUD (issue
+   * #806). Optional so older host-supplied capability shapes stay valid;
+   * clients treat a missing flag as "not served".
+   */
+  providers?: boolean;
   /** Reserved for future `/shell/*` route group; always `false` in V1. */
   shell: boolean;
 };
@@ -24,6 +30,7 @@ export const AGENT_SERVER_DEFAULT_CAPABILITIES: AgentServerCapabilities = {
   agent: true,
   workspaces: true,
   sessions: true,
+  providers: true,
   shell: false,
 };
 
diff --git a/packages/grida-ai-agent/src/protocol/provider-ids.ts b/packages/grida-ai-agent/src/protocol/provider-ids.ts
index bfa3b89da..78efd49e5 100644
--- a/packages/grida-ai-agent/src/protocol/provider-ids.ts
+++ b/packages/grida-ai-agent/src/protocol/provider-ids.ts
@@ -23,3 +23,15 @@ export type ByokProviderId = ByokProviderMetadata["id"];
 export const BYOK_PROVIDER_IDS = BYOK_PROVIDER_METADATA.map(
   (provider) => provider.id
 ) as readonly ByokProviderId[];
+
+export function isByokProviderId(id: string): id is ByokProviderId {
+  return (BYOK_PROVIDER_IDS as readonly string[]).includes(id);
+}
+
+/**
+ * A provider id anywhere on the wire (run options, session rows, secrets):
+ * a BYOK id or a configured endpoint id (issue #806). `string & {}` keeps
+ * literal completion for the BYOK ids while admitting endpoint ids, which
+ * are user-chosen slugs validated at the boundary.
+ */
+export type ProviderId = ByokProviderId | (string & {});
diff --git a/packages/grida-ai-agent/src/protocol/run.ts b/packages/grida-ai-agent/src/protocol/run.ts
index 8cd2be37c..ae9c837ac 100644
--- a/packages/grida-ai-agent/src/protocol/run.ts
+++ b/packages/grida-ai-agent/src/protocol/run.ts
@@ -4,7 +4,7 @@
  */
 
 import type { models, ModelTier } from "@grida/ai-models";
-import type { ByokProviderId } from "./provider-ids";
+import type { ProviderId } from "./provider-ids";
 import type { SkillId } from "./skills";
 import type { AgentMode } from "./mode";
 
@@ -24,7 +24,13 @@ export const AGENT_SESSION_AGENT = "grida" as const;
  */
 export const GRIDA_SESSION_SSE_EVENT = "grida-session" as const;
 
-export type AgentModelId = models.text.CatalogId;
+/**
+ * A runnable model id: a catalog id, or a user-registered model id served
+ * by a configured endpoint provider (issue #806 — e.g. `llama3.1:8b` on
+ * Ollama). Open on the wire; the run-input boundary validates against
+ * catalog ∪ registered ids, so an arbitrary string still 400s.
+ */
+export type AgentModelId = models.text.CatalogId | (string & {});
 
 export type AgentRunMessagePart = {
   type: string;
@@ -64,7 +70,11 @@ export type AgentRunOptions = {
    * the one `tier` would resolve to.
    */
   model_id?: AgentModelId;
-  provider_id?: ByokProviderId;
+  /**
+   * Explicit provider pick (issue #806). Validated server-side against
+   * the allowed set; an unknown id 400s.
+   */
+  provider_id?: ProviderId;
   feature?: string;
   workspace_id?: string;
   skills?: readonly SkillId[];
diff --git a/packages/grida-ai-agent/src/providers/byok.ts b/packages/grida-ai-agent/src/providers/byok.ts
index 1e60ce24d..bdb1a9bc9 100644
--- a/packages/grida-ai-agent/src/providers/byok.ts
+++ b/packages/grida-ai-agent/src/providers/byok.ts
@@ -25,6 +25,10 @@ export function makeOpenRouterFactory(apiKey: string): ModelFactory {
     baseURL: "https://openrouter.ai/api/v1",
     apiKey,
     headers: OPENROUTER_HEADERS,
+    // OpenAI-compat streams omit the usage chunk unless
+    // `stream_options.include_usage` is requested — without it every
+    // streamed run records zero tokens (no rollups, no context meter).
+    includeUsage: true,
   });
   // Both OpenRouter and the catalog use Vercel-style `creator/model`
   // ids, so an explicit pick hands straight through; otherwise fall
@@ -36,3 +40,31 @@ export function makeVercelFactory(apiKey: string): ModelFactory {
   const provider = createGateway({ apiKey });
   return (tier, modelId) => provider(modelId ?? MODEL_BY_TIER[tier]);
 }
+
+/**
+ * Factory for a user-configured OpenAI-compatible endpoint (issue #806) —
+ * Ollama, LiteLLM, vLLM, any self-hosted gateway. The "no signup" trick
+ * is that `api_key` is OPTIONAL: when absent (Ollama) no Authorization
+ * header is sent, and that is not an error.
+ *
+ * Tier mapping: EVERY tier resolves to the endpoint's default model. The
+ * catalog's tier→id table (`anthropic/claude-…`) is meaningless to a
+ * local endpoint, and background subagents (titler, compactor) resolve
+ * tiers too — they must land on a model this endpoint actually serves.
+ */
+export function makeEndpointFactory(config: {
+  id: string;
+  base_url: string;
+  api_key?: string;
+  default_model_id: string;
+}): ModelFactory {
+  const provider = createOpenAICompatible({
+    name: config.id,
+    baseURL: config.base_url,
+    apiKey: config.api_key,
+    // Same as the OpenRouter factory: opt in to the streaming usage
+    // chunk, or streamed runs record zero tokens.
+    includeUsage: true,
+  });
+  return (_tier, modelId) => provider(modelId ?? config.default_model_id);
+}
diff --git a/packages/grida-ai-agent/src/providers/endpoints.live.test.ts b/packages/grida-ai-agent/src/providers/endpoints.live.test.ts
new file mode 100644
index 000000000..a8a3ba74d
--- /dev/null
+++ b/packages/grida-ai-agent/src/providers/endpoints.live.test.ts
@@ -0,0 +1,325 @@
+/**
+ * LIVE end-to-end — endpoint providers against a REAL local Ollama
+ * (issue #806). The durability bar for "no signup, no key": a host with
+ * NO BYOK secret and one configured endpoint must run the agent end to
+ * end — provider resolution, the run loop, session persistence, the
+ * background titler, and a real server-side tool execution.
+ *
+ * Gated + excluded from CI (needs a local `ollama serve` + a pulled
+ * model). Run explicitly:
+ *
+ *   GRIDA_LIVE_OLLAMA=1 \
+ *     pnpm --filter @grida/agent vitest run src/providers/endpoints.live.test.ts
+ *
+ * Env knobs:
+ *   GRIDA_LIVE_OLLAMA=1       — required, opts in.
+ *   GRIDA_LIVE_OLLAMA_MODEL   — model id to use (default: first from /api/tags).
+ *   GRIDA_LIVE_OLLAMA_URL     — base URL (default: the Ollama preset).
+ */
+
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest";
+import { Hono } from "hono";
+import { AuthStore } from "../auth/file";
+import { SecretsStore } from "../secrets";
+import { WorkspaceRegistry } from "../workspaces";
+import { openSessionsDb } from "../session/db";
+import { SessionsStore } from "../session/store";
+import { OLLAMA_ENDPOINT_PRESET } from "../protocol/endpoints";
+import { session_title } from "../session/title";
+import { AgentRuntime } from "../runtime";
+import { StreamRegistry } from "../runtime/stream-registry";
+import { registerAgentRoutes } from "../http/routes/agent";
+import { sessionIdFromSse } from "../testing/sse";
+import { EndpointProvidersStore } from "./endpoints";
+import { probeEndpointModels } from "./probe";
+import { resolveProvider } from ".";
+
+const LIVE = process.env.GRIDA_LIVE_OLLAMA === "1";
+const BASE_URL =
+  process.env.GRIDA_LIVE_OLLAMA_URL ?? OLLAMA_ENDPOINT_PRESET.base_url;
+const TIMEOUT_MS = 300_000;
+
+const liveDescribe = LIVE ? describe : describe.skip;
+
+/** The model to test with — env override, else the first installed model. */
+async function detectModelId(): Promise<string> {
+  if (process.env.GRIDA_LIVE_OLLAMA_MODEL) {
+    return process.env.GRIDA_LIVE_OLLAMA_MODEL;
+  }
+  const origin = new URL(BASE_URL).origin;
+  const res = await fetch(`${origin}/api/tags`);
+  const data = (await res.json()) as { models?: Array<{ name: string }> };
+  const first = data.models?.[0]?.name;
+  if (!first) throw new Error("no Ollama models installed — `ollama pull` one");
+  return first;
+}
+
+// Concatenate the assistant's streamed text out of a drained SSE body.
+function assistantTextFromSse(body: string): string {
+  let text = "";
+  for (const frame of body.split("\n\n")) {
+    for (const line of frame.split("\n")) {
+      if (!line.startsWith("data:")) continue;
+      const payload = line.slice("data:".length).trim();
+      if (!payload || payload === "[DONE]") continue;
+      try {
+        const obj = JSON.parse(payload) as { type?: string; delta?: string };
+        if (obj.type === "text-delta" && typeof obj.delta === "string") {
+          text += obj.delta;
+        }
+      } catch {
+        /* not a JSON UIMessageChunk frame (e.g. the session frame) */
+      }
+    }
+  }
+  return text;
+}
+
+type Host = {
+  app: Hono;
+  runtime: AgentRuntime;
+  store: SessionsStore;
+  workspaces: WorkspaceRegistry;
+};
+
+function buildHost(baseDir: string): Host {
+  const auth = new AuthStore(baseDir);
+  const secrets = new SecretsStore(auth);
+  const endpoints = new EndpointProvidersStore(baseDir);
+  const workspaces = new WorkspaceRegistry(baseDir);
+  const db = openSessionsDb({ user_data_path: baseDir });
+  const store = new SessionsStore(db);
+  const app = new Hono();
+  const runtime = new AgentRuntime({
+    secrets,
+    endpoints,
+    workspace_registry: workspaces,
+    sessions_store: store,
+    streams: new StreamRegistry(),
+    drain_cooldown_ms: 20,
+  });
+  registerAgentRoutes(app, runtime);
+  return { app, runtime, store, workspaces };
+}
+
+async function runTurn(
+  host: Host,
+  body: Record<string, unknown>
+): Promise<{ status: number; text: string; session_id: string }> {
+  const res = await host.app.request("/agent/run", {
+    method: "POST",
+    headers: { "content-type": "application/json" },
+    body: JSON.stringify(body),
+  });
+  const sse = await res.text();
+  return {
+    status: res.status,
+    text: assistantTextFromSse(sse),
+    session_id: sessionIdFromSse(sse),
+  };
+}
+
+let MODEL_ID = "";
+
+liveDescribe("LIVE — Ollama endpoint provider, no key (issue #806)", () => {
+  let baseDir: string;
+  let host: Host;
+
+  beforeAll(async () => {
+    MODEL_ID = await detectModelId();
+    console.log(`[live-ollama] model=${MODEL_ID} base_url=${BASE_URL}`);
+  });
+
+  beforeEach(async () => {
+    baseDir = await fs.mkdtemp(path.join(os.tmpdir(), "grida-ollama-live-"));
+    // NO BYOK key is ever set — the whole point. Just the endpoint config.
+    const endpoints = new EndpointProvidersStore(baseDir);
+    await endpoints.set({
+      ...OLLAMA_ENDPOINT_PRESET,
+      base_url: BASE_URL,
+      models: [{ id: MODEL_ID, contextWindow: 32_768, tool_call: true }],
+    });
+    host = buildHost(baseDir);
+  });
+
+  afterEach(async () => {
+    // Conditional: a beforeEach failure leaves `host`/`baseDir` unset —
+    // teardown must surface the setup error, not mask it by throwing.
+    (host as Host | undefined)?.runtime.dispose();
+    (host as Host | undefined)?.store.close();
+    if (baseDir) await fs.rm(baseDir, { recursive: true, force: true });
+  });
+
+  it(
+    "resolves the endpoint provider with no secret configured",
+    async () => {
+      const endpoints = new EndpointProvidersStore(baseDir);
+      const secrets = new SecretsStore(new AuthStore(baseDir));
+      const provider = await resolveProvider({ secrets, endpoints });
+      expect(provider.provider_id).toBe("ollama");
+      expect(provider.kind).toBe("endpoint");
+    },
+    TIMEOUT_MS
+  );
+
+  it(
+    "probes the running Ollama and discovers the test model",
+    async () => {
+      const result = await probeEndpointModels(BASE_URL);
+      expect(result.ok).toBe(true);
+      if (!result.ok) return;
+      expect(result.source).toBe("ollama");
+      const found = result.models.find((m) => m.id === MODEL_ID);
+      expect(found).toBeDefined();
+      // The live model advertises tool support via /api/tags capabilities.
+      expect(found?.tool_call).toBe(true);
+      // Context window comes from /api/ps (loaded allocation) or
+      // /api/show (model max) — either way a real positive number.
+      expect(found?.contextWindow ?? 0).toBeGreaterThan(0);
+    },
+    TIMEOUT_MS
+  );
+
+  it(
+    "runs a keyless text turn end-to-end and persists the session",
+    async () => {
+      const turn = await runTurn(host, {
+        messages: [
+          {
+            role: "user",
+            content:
+              "Reply with exactly the word GRIDA_OK and nothing else. No punctuation.",
+          },
+        ],
+        model_id: MODEL_ID,
+      });
+      expect(turn.status).toBe(200);
+      expect(turn.session_id).toBeTruthy();
+      expect(turn.text).toContain("GRIDA_OK");
+
+      const session = await host.store.get(turn.session_id!);
+      expect(session?.model?.provider_id).toBe("ollama");
+      expect(session?.model?.model_id).toBe(MODEL_ID);
+      // Usage was recorded off the real stream.
+      expect(session?.total_tokens ?? 0).toBeGreaterThan(0);
+
+      // The background titler rides the SAME endpoint factory (its `nano`
+      // tier must land on the local model). Poll for the rename.
+      let titled = false;
+      for (let i = 0; i < 60 && !titled; i++) {
+        await new Promise((r) => setTimeout(r, 1000));
+        const row = await host.store.get(turn.session_id!);
+        titled = row != null && !session_title.isDefault(row.title);
+      }
+      expect(titled).toBe(true);
+    },
+    TIMEOUT_MS
+  );
+
+  it(
+    "second turn continues the same session (server-authoritative view)",
+    async () => {
+      const first = await runTurn(host, {
+        messages: [
+          {
+            role: "user",
+            content:
+              "My secret code word is ZUMBRA. Acknowledge with OK and nothing else.",
+          },
+        ],
+        model_id: MODEL_ID,
+      });
+      expect(first.status).toBe(200);
+      const second = await runTurn(host, {
+        session_id: first.session_id,
+        messages: [
+          {
+            role: "user",
+            content:
+              "Reply with exactly my secret code word from earlier and nothing else.",
+          },
+        ],
+        model_id: MODEL_ID,
+      });
+      expect(second.status).toBe(200);
+      expect(second.session_id).toBe(first.session_id);
+      expect(second.text.toUpperCase()).toContain("ZUMBRA");
+    },
+    TIMEOUT_MS
+  );
+
+  it(
+    "manual compaction summarizes via the endpoint model (thinking-safe cap)",
+    async () => {
+      const first = await runTurn(host, {
+        messages: [
+          {
+            role: "user",
+            content:
+              "We are naming a project. I propose the name AURELIA-9. Acknowledge briefly.",
+          },
+        ],
+        model_id: MODEL_ID,
+      });
+      expect(first.status).toBe(200);
+
+      const res = await host.runtime.compact(first.session_id);
+      const result = (await res.json()) as {
+        compacted: boolean;
+        reason?: string;
+        summary_message_id?: string;
+      };
+      // A thinking model with a too-tight output cap returns an EMPTY
+      // summary (`finish_reason: length` before any text) — `compacted`
+      // flips false ("summarizer-failed") or persists nothing useful.
+      expect(result.compacted).toBe(true);
+
+      const messages = await host.store.listVisibleMessages(first.session_id);
+      const summaryPart = messages
+        .flatMap((m) => m.parts)
+        .find((p) => p.type === "data-compaction");
+      const summary = (
+        summaryPart?.data as { data?: { summary?: string } } | null
+      )?.data?.summary;
+      expect(summary ?? "").toMatch(/AURELIA-9/i);
+    },
+    TIMEOUT_MS
+  );
+
+  it(
+    "executes a REAL server-side tool call (workspace fs write)",
+    async () => {
+      const wsRoot = await fs.mkdtemp(
+        path.join(os.tmpdir(), "grida-ollama-ws-")
+      );
+      try {
+        const ws = await host.workspaces.open(wsRoot);
+        const turn = await runTurn(host, {
+          workspace_id: ws.id,
+          model_id: MODEL_ID,
+          // `auto` so the local run needs no supervised approval round-trip.
+          mode: "auto",
+          messages: [
+            {
+              role: "user",
+              content:
+                "Use your file tools to create a file named hello.txt at the workspace root containing exactly: hello from ollama — then confirm.",
+            },
+          ],
+        });
+        expect(turn.status).toBe(200);
+        const written = await fs.readFile(
+          path.join(ws.root, "hello.txt"),
+          "utf8"
+        );
+        expect(written.toLowerCase()).toContain("hello from ollama");
+      } finally {
+        await fs.rm(wsRoot, { recursive: true, force: true });
+      }
+    },
+    TIMEOUT_MS
+  );
+});
diff --git a/packages/grida-ai-agent/src/providers/endpoints.test.ts b/packages/grida-ai-agent/src/providers/endpoints.test.ts
new file mode 100644
index 000000000..4d8896352
--- /dev/null
+++ b/packages/grida-ai-agent/src/providers/endpoints.test.ts
@@ -0,0 +1,433 @@
+/**
+ * Endpoint provider layer (issue #806): config validation, the file-
+ * backed store, and the `/providers/endpoints/*` + extended `/secrets/*`
+ * routes. Runs against a tmp-dir store and a bare Hono app — no model,
+ * no network.
+ */
+import fs from "node:fs/promises";
+import os from "node:os";
+import path from "node:path";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+import { Hono } from "hono";
+import {
+  OLLAMA_ENDPOINT_PRESET,
+  isValidEndpointProviderId,
+  mergeProbedModels,
+  resolveEndpointModel,
+  validateEndpointProviderConfig,
+  type EndpointProviderConfig,
+} from "../protocol/endpoints";
+import { AuthStore } from "../auth/file";
+import { SecretsStore } from "../secrets";
+import { registerProvidersRoutes } from "../http/routes/providers";
+import { registerSecretsRoutes } from "../http/routes/secrets";
+import { EndpointProvidersStore } from "./endpoints";
+
+const OLLAMA: EndpointProviderConfig = {
+  ...OLLAMA_ENDPOINT_PRESET,
+  models: [{ id: "llama3.1:8b" }, { id: "qwen3:32b", tool_call: false }],
+};
+
+describe("validateEndpointProviderConfig", () => {
+  it("accepts the Ollama preset shape", () => {
+    const result = validateEndpointProviderConfig(OLLAMA);
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.config.id).toBe("ollama");
+    expect(result.config.base_url).toBe("http://localhost:11434/v1");
+    expect(result.config.models.length).toBe(2);
+  });
+
+  it("rejects BYOK-colliding and malformed ids", () => {
+    expect(isValidEndpointProviderId("openrouter")).toBe(false);
+    expect(isValidEndpointProviderId("vercel")).toBe(false);
+    expect(isValidEndpointProviderId("Ollama")).toBe(false);
+    expect(isValidEndpointProviderId("")).toBe(false);
+    expect(isValidEndpointProviderId("ollama")).toBe(true);
+    expect(isValidEndpointProviderId("my-gateway_2")).toBe(true);
+  });
+
+  it("rejects non-http(s) base URLs", () => {
+    for (const base_url of ["file:///etc", "ftp://x", "not a url", "", "  "]) {
+      const result = validateEndpointProviderConfig({ ...OLLAMA, base_url });
+      expect(result.ok).toBe(false);
+    }
+  });
+
+  it("trims whitespace padding off base_url before persisting", () => {
+    const result = validateEndpointProviderConfig({
+      ...OLLAMA,
+      base_url: "  http://localhost:11434/v1\n",
+    });
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.config.base_url).toBe("http://localhost:11434/v1");
+  });
+
+  it("rejects duplicate model ids and a dangling default_model_id", () => {
+    expect(
+      validateEndpointProviderConfig({
+        ...OLLAMA,
+        models: [{ id: "m" }, { id: "m" }],
+      }).ok
+    ).toBe(false);
+    expect(
+      validateEndpointProviderConfig({
+        ...OLLAMA,
+        default_model_id: "not-registered",
+      }).ok
+    ).toBe(false);
+    expect(
+      validateEndpointProviderConfig({
+        ...OLLAMA,
+        default_model_id: "qwen3:32b",
+      }).ok
+    ).toBe(true);
+  });
+
+  it("drops unknown fields and never accepts a cost card from input", () => {
+    const result = validateEndpointProviderConfig({
+      ...OLLAMA,
+      models: [{ id: "m", cost: { input: 1, output: 2 }, evil: true }],
+    });
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.config.models[0]).not.toHaveProperty("cost");
+    expect(result.config.models[0]).not.toHaveProperty("evil");
+  });
+
+  it("accepts overrides and resolves them over detected values", () => {
+    const result = validateEndpointProviderConfig({
+      ...OLLAMA,
+      models: [
+        {
+          id: "m",
+          tool_call: true,
+          contextWindow: 262_144,
+          overrides: { contextWindow: 32_768, junk: true },
+        },
+      ],
+    });
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    const entry = result.config.models[0];
+    // Stored shape keeps both halves; unknown override keys are dropped.
+    expect(entry.contextWindow).toBe(262_144);
+    expect(entry.overrides).toEqual({ contextWindow: 32_768 });
+    // Resolution: override wins, untouched fields fall through.
+    const resolved = resolveEndpointModel(entry);
+    expect(resolved.contextWindow).toBe(32_768);
+    expect(resolved.tool_call).toBe(true);
+    expect(resolved).not.toHaveProperty("overrides");
+  });
+
+  it("rejects malformed overrides", () => {
+    expect(
+      validateEndpointProviderConfig({
+        ...OLLAMA,
+        models: [{ id: "m", overrides: { contextWindow: -5 } }],
+      }).ok
+    ).toBe(false);
+    expect(
+      validateEndpointProviderConfig({
+        ...OLLAMA,
+        models: [{ id: "m", overrides: "nope" }],
+      }).ok
+    ).toBe(false);
+  });
+
+  it("rejects out-of-range numeric limits", () => {
+    expect(
+      validateEndpointProviderConfig({
+        ...OLLAMA,
+        models: [{ id: "m", contextWindow: -1 }],
+      }).ok
+    ).toBe(false);
+    expect(
+      validateEndpointProviderConfig({
+        ...OLLAMA,
+        models: [{ id: "m", contextWindow: 1.5 }],
+      }).ok
+    ).toBe(false);
+  });
+});
+
+describe("mergeProbedModels — the detection-owned merge contract", () => {
+  it("probe overwrites detected fields, never overrides; silent probe keeps prior detection", () => {
+    const result = mergeProbedModels(
+      [
+        {
+          id: "gemma4:31b-mlx",
+          tool_call: false, // stale detection
+          contextWindow: 8_192,
+          overrides: { contextWindow: 32_768 },
+        },
+        { id: "unprobed:7b", tool_call: true }, // not in probe result
+      ],
+      [
+        // tool_call reported, contextWindow silent (older Ollama): the
+        // silent field keeps the previous detection.
+        { id: "gemma4:31b-mlx", tool_call: true },
+      ]
+    );
+    expect(result.updated).toBe(1);
+    expect(result.discovered).toBe(0);
+    expect(result.models[0]).toEqual({
+      id: "gemma4:31b-mlx",
+      tool_call: true,
+      contextWindow: 8_192,
+      overrides: { contextWindow: 32_768 }, // untouched, always
+    });
+    expect(result.models[1]).toEqual({ id: "unprobed:7b", tool_call: true });
+  });
+
+  it("appends newly discovered models and reports no-op merges", () => {
+    const result = mergeProbedModels(
+      [{ id: "known:8b", tool_call: true }],
+      [
+        { id: "known:8b", tool_call: true }, // unchanged
+        { id: "new:31b", tool_call: true, contextWindow: 262_144 },
+      ]
+    );
+    expect(result.updated).toBe(0);
+    expect(result.discovered).toBe(1);
+    expect(result.models.map((m) => m.id)).toEqual(["known:8b", "new:31b"]);
+    expect(result.models[1].contextWindow).toBe(262_144);
+
+    const noop = mergeProbedModels([{ id: "known:8b", tool_call: true }], []);
+    expect(noop.updated).toBe(0);
+    expect(noop.discovered).toBe(0);
+  });
+});
+
+describe("EndpointProvidersStore", () => {
+  let baseDir: string;
+  let store: EndpointProvidersStore;
+
+  beforeEach(async () => {
+    baseDir = await fs.mkdtemp(path.join(os.tmpdir(), "grida-endpoints-"));
+    store = new EndpointProvidersStore(baseDir);
+  });
+
+  afterEach(async () => {
+    await fs.rm(baseDir, { recursive: true, force: true });
+  });
+
+  it("persists round-trip and survives a fresh store instance", async () => {
+    await store.set(OLLAMA);
+    const fresh = new EndpointProvidersStore(baseDir);
+    const list = await fresh.list();
+    expect(list.length).toBe(1);
+    expect(list[0].id).toBe("ollama");
+    expect(await fresh.get("ollama")).not.toBeNull();
+    expect(await fresh.registeredModels()).toHaveLength(2);
+  });
+
+  it("set replaces the entry with the same id", async () => {
+    await store.set(OLLAMA);
+    await store.set({ ...OLLAMA, models: [{ id: "only-one" }] });
+    expect(await store.registeredModels()).toHaveLength(1);
+  });
+
+  it("registeredModels applies overrides — every registry consumer sees effective values", async () => {
+    await store.set({
+      ...OLLAMA,
+      models: [
+        {
+          id: "capped:31b",
+          contextWindow: 262_144,
+          overrides: { contextWindow: 32_768 },
+        },
+      ],
+    });
+    const models = await store.registeredModels();
+    expect(models[0].contextWindow).toBe(32_768);
+  });
+
+  it("delete is idempotent", async () => {
+    await store.set(OLLAMA);
+    await store.delete("ollama");
+    await store.delete("ollama");
+    expect(await store.list()).toHaveLength(0);
+  });
+
+  it("rejects an invalid config thrown at the store layer", async () => {
+    await expect(
+      store.set({ ...OLLAMA, base_url: "file:///etc" })
+    ).rejects.toThrow(/invalid config/);
+  });
+
+  it("concurrent first reads share one load — no empty-cache window", async () => {
+    await fs.writeFile(
+      path.join(baseDir, "endpoints.json"),
+      JSON.stringify([OLLAMA]),
+      "utf8"
+    );
+    const fresh = new EndpointProvidersStore(baseDir);
+    const [list, entry, models] = await Promise.all([
+      fresh.list(),
+      fresh.get("ollama"),
+      fresh.registeredModels(),
+    ]);
+    expect(list).toHaveLength(1);
+    expect(entry).not.toBeNull();
+    expect(models).toHaveLength(2);
+  });
+
+  it("concurrent writes serialize — neither overwrites the other", async () => {
+    const other: EndpointProviderConfig = {
+      id: "litellm",
+      base_url: "http://localhost:4000/v1",
+      models: [{ id: "m" }],
+    };
+    await Promise.all([store.set(OLLAMA), store.set(other)]);
+    expect((await store.list()).map((e) => e.id).sort()).toEqual([
+      "litellm",
+      "ollama",
+    ]);
+    // The file agrees — a stale-snapshot persist would have dropped one.
+    const fresh = new EndpointProvidersStore(baseDir);
+    expect(await fresh.list()).toHaveLength(2);
+  });
+
+  it("drops invalid entries on load instead of failing", async () => {
+    await fs.writeFile(
+      path.join(baseDir, "endpoints.json"),
+      JSON.stringify([OLLAMA, { id: "broken" }, "junk"]),
+      "utf8"
+    );
+    expect((await store.list()).map((e) => e.id)).toEqual(["ollama"]);
+  });
+});
+
+describe("HTTP wire — /providers/endpoints/* and endpoint-id secrets", () => {
+  let baseDir: string;
+  let app: Hono;
+  let endpoints: EndpointProvidersStore;
+  let secrets: SecretsStore;
+
+  beforeEach(async () => {
+    baseDir = await fs.mkdtemp(path.join(os.tmpdir(), "grida-providers-rt-"));
+    endpoints = new EndpointProvidersStore(baseDir);
+    secrets = new SecretsStore(new AuthStore(baseDir));
+    app = new Hono();
+    registerProvidersRoutes(app, { endpoints, secrets });
+    registerSecretsRoutes(app, { store: secrets, endpoints });
+  });
+
+  afterEach(async () => {
+    await fs.rm(baseDir, { recursive: true, force: true });
+  });
+
+  const post = (route: string, body?: unknown) =>
+    app.request(route, {
+      method: "POST",
+      headers: { "content-type": "application/json" },
+      body: body === undefined ? undefined : JSON.stringify(body),
+    });
+
+  it("info reports the config file path", async () => {
+    const res = await post("/providers/endpoints/info");
+    expect(res.status).toBe(200);
+    const { path: configPath } = (await res.json()) as { path: string };
+    expect(configPath.endsWith("endpoints.json")).toBe(true);
+  });
+
+  it("set → list → delete round-trips", async () => {
+    const set = await post("/providers/endpoints/set", { config: OLLAMA });
+    expect(set.status).toBe(200);
+
+    const list = await post("/providers/endpoints/list");
+    expect(list.status).toBe(200);
+    const configs = (await list.json()) as EndpointProviderConfig[];
+    expect(configs.map((c) => c.id)).toEqual(["ollama"]);
+
+    const del = await post("/providers/endpoints/delete", { id: "ollama" });
+    expect(del.status).toBe(200);
+    expect(await (await post("/providers/endpoints/list")).json()).toEqual([]);
+  });
+
+  it("probe route returns parsed models, 502s an unreachable endpoint", async () => {
+    const probeApp = new Hono();
+    registerProvidersRoutes(probeApp, {
+      endpoints,
+      probe: async (baseUrl: string) =>
+        baseUrl.includes("11434")
+          ? {
+              ok: true as const,
+              source: "ollama" as const,
+              models: [{ id: "gemma4:31b-mlx", tool_call: true }],
+            }
+          : { ok: false as const, error: "no model listing at this endpoint" },
+    });
+    const probePost = (body: unknown) =>
+      probeApp.request("/providers/endpoints/probe", {
+        method: "POST",
+        headers: { "content-type": "application/json" },
+        body: JSON.stringify(body),
+      });
+
+    const ok = await probePost({ base_url: "http://localhost:11434/v1" });
+    expect(ok.status).toBe(200);
+    expect(await ok.json()).toEqual({
+      source: "ollama",
+      models: [{ id: "gemma4:31b-mlx", tool_call: true }],
+    });
+
+    const down = await probePost({ base_url: "http://localhost:9/v1" });
+    expect(down.status).toBe(502);
+
+    const bad = await probePost({});
+    expect(bad.status).toBe(400);
+
+    // Malformed input is the caller's fault — 400, not a 502 "outage".
+    const malformed = await probePost({ base_url: "not a url" });
+    expect(malformed.status).toBe(400);
+    const wrongScheme = await probePost({ base_url: "ftp://host/v1" });
+    expect(wrongScheme.status).toBe(400);
+  });
+
+  it("400s an invalid config with the validator's message", async () => {
+    const res = await post("/providers/endpoints/set", {
+      config: { ...OLLAMA, id: "openrouter" },
+    });
+    expect(res.status).toBe(400);
+    const body = (await res.json()) as { error: string };
+    expect(body.error).toMatch(/id/);
+  });
+
+  it("deleting an endpoint deletes its stored key — no orphaned credential", async () => {
+    await post("/providers/endpoints/set", { config: OLLAMA });
+    await post("/secrets/set", { provider_id: "ollama", key: "gateway-key" });
+    expect(await secrets.has("ollama")).toBe(true);
+
+    await post("/providers/endpoints/delete", { id: "ollama" });
+    // The key went with the endpoint: nothing stale in auth.json, and a
+    // re-created "ollama" endpoint can't silently reuse the old credential.
+    expect(await secrets.has("ollama")).toBe(false);
+  });
+
+  it("secrets routes accept a configured endpoint id, reject unknown ids", async () => {
+    // Unknown until configured.
+    expect(
+      (await post("/secrets/set", { provider_id: "ollama", key: "k" })).status
+    ).toBe(400);
+
+    await post("/providers/endpoints/set", { config: OLLAMA });
+
+    expect(
+      (await post("/secrets/set", { provider_id: "ollama", key: "k" })).status
+    ).toBe(200);
+    const has = await post("/secrets/has", { provider_id: "ollama" });
+    expect(((await has.json()) as { has: boolean }).has).toBe(true);
+
+    // BYOK ids still work; junk still 400s.
+    expect(
+      (await post("/secrets/set", { provider_id: "openrouter", key: "k" }))
+        .status
+    ).toBe(200);
+    expect((await post("/secrets/has", { provider_id: "nope" })).status).toBe(
+      400
+    );
+  });
+});
diff --git a/packages/grida-ai-agent/src/providers/endpoints.ts b/packages/grida-ai-agent/src/providers/endpoints.ts
new file mode 100644
index 000000000..378c2b94f
--- /dev/null
+++ b/packages/grida-ai-agent/src/providers/endpoints.ts
@@ -0,0 +1,163 @@
+/**
+ * GRIDA-SEC-004 — endpoint provider config store (issue #806).
+ *
+ * Persists user-configured OpenAI-compatible endpoints (Ollama, self-
+ * hosted gateways) at `${userData}/endpoints.json` with the same
+ * atomic-write pattern as `workspaces.json` / `recent.json`.
+ *
+ * Deliberately a SIBLING of `SecretsStore`, not part of it: an endpoint
+ * config (base URL + registered models) is plain readable config the
+ * renderer may list back, while secrets are write/presence/delete-only.
+ * If a gateway needs an API key, that key goes through the secrets
+ * surface under the endpoint's id — it never lands in this file.
+ *
+ * Every load re-validates entries through the protocol validator, so a
+ * hand-edited or corrupted file degrades to "entry dropped", never to
+ * an invalid config reaching the provider factory.
+ */
+
+import fs from "node:fs/promises";
+import path from "node:path";
+import {
+  resolveEndpointModels,
+  validateEndpointProviderConfig,
+  type EndpointModelSpec,
+  type EndpointProviderConfig,
+} from "../protocol/endpoints";
+import { isByokProviderId } from "../protocol/provider-ids";
+import { atomicWrite } from "../storage/atomic-write";
+
+const FILE_NAME = "endpoints.json";
+const MAX_ENTRIES = 16;
+
+/**
+ * THE provider-id namespace gate: BYOK ids ∪ configured endpoint ids.
+ * Shared by every boundary that accepts a provider id (`/secrets/*`
+ * allowlist, the run-input `provider_id` gate) — a closed set; anything
+ * else must 400.
+ */
+export async function isKnownProviderId(
+  id: string,
+  endpoints?: EndpointProvidersStore
+): Promise<boolean> {
+  if (isByokProviderId(id)) return true;
+  return (await endpoints?.get(id)) != null;
+}
+
+export class EndpointProvidersStore {
+  private entries: EndpointProviderConfig[] = [];
+  private load_promise: Promise<void> | null = null;
+  private write_chain: Promise<void> = Promise.resolve();
+  private readonly file_path: string;
+
+  constructor(userDataPath: string) {
+    this.file_path = path.join(userDataPath, FILE_NAME);
+  }
+
+  /** Absolute path of the backing JSON — surfaced so the settings UI can
+   *  point developers at the hand-editable file (overrides live there). */
+  get filePath(): string {
+    return this.file_path;
+  }
+
+  /** One shared load: concurrent first calls await the SAME read instead
+   *  of a second caller observing the default empty cache mid-load. */
+  private ensureLoaded(): Promise<void> {
+    this.load_promise ??= this.loadOnce();
+    return this.load_promise;
+  }
+
+  private async loadOnce(): Promise<void> {
+    try {
+      const raw = await fs.readFile(this.file_path, "utf8");
+      const parsed = JSON.parse(raw);
+      if (Array.isArray(parsed)) {
+        const valid: EndpointProviderConfig[] = [];
+        for (const entry of parsed) {
+          const result = validateEndpointProviderConfig(entry);
+          if (result.ok && !valid.some((e) => e.id === result.config.id)) {
+            valid.push(result.config);
+          }
+        }
+        this.entries = valid.slice(0, MAX_ENTRIES);
+      }
+    } catch {
+      // Missing or corrupt file → empty. Endpoint config is cheap to
+      // re-enter; a hand-edit-the-JSON-to-recover UX would be hostile.
+      this.entries = [];
+    }
+  }
+
+  /** Serialize mutations: each read-modify-persist runs against the
+   *  previous one's result, so concurrent `set()`/`delete()` calls can't
+   *  compute from stale snapshots and overwrite each other on disk. */
+  private withWriteLock<T>(fn: () => Promise<T>): Promise<T> {
+    const run = this.write_chain.then(fn);
+    this.write_chain = run.then(
+      () => undefined,
+      () => undefined
+    );
+    return run;
+  }
+
+  private async persist(): Promise<void> {
+    await atomicWrite(this.file_path, JSON.stringify(this.entries, null, 2));
+  }
+
+  async list(): Promise<EndpointProviderConfig[]> {
+    await this.ensureLoaded();
+    return [...this.entries];
+  }
+
+  async get(id: string): Promise<EndpointProviderConfig | null> {
+    await this.ensureLoaded();
+    return this.entries.find((e) => e.id === id) ?? null;
+  }
+
+  /**
+   * Insert or replace the config with the same id. The caller (route)
+   * validates the shape; this re-validates anyway so a non-route caller
+   * can't persist an invalid entry.
+   */
+  async set(config: EndpointProviderConfig): Promise<void> {
+    const result = validateEndpointProviderConfig(config);
+    if (!result.ok) {
+      throw new Error(`[agent-host-endpoints] invalid config: ${result.error}`);
+    }
+    await this.withWriteLock(async () => {
+      await this.ensureLoaded();
+      const next = this.entries.filter((e) => e.id !== result.config.id);
+      if (next.length >= MAX_ENTRIES) {
+        throw new Error(
+          `[agent-host-endpoints] too many endpoint providers (max ${MAX_ENTRIES})`
+        );
+      }
+      next.push(result.config);
+      this.entries = next;
+      await this.persist();
+    });
+  }
+
+  async delete(id: string): Promise<void> {
+    await this.withWriteLock(async () => {
+      await this.ensureLoaded();
+      const next = this.entries.filter((e) => e.id !== id);
+      if (next.length === this.entries.length) return;
+      this.entries = next;
+      await this.persist();
+    });
+  }
+
+  /**
+   * Every model registered across all endpoints, OVERRIDE-RESOLVED —
+   * the custom half of the model-registry seam
+   * (`models.text.registry.resolve(id, THIS)`). Consumers: the run-input
+   * model gate, compaction limits, multimodal/tool_call capability
+   * checks — all of them must see the effective values, never the raw
+   * detected fields.
+   */
+  async registeredModels(): Promise<EndpointModelSpec[]> {
+    await this.ensureLoaded();
+    return this.entries.flatMap((e) => resolveEndpointModels(e));
+  }
+}
diff --git a/packages/grida-ai-agent/src/providers/index.test.ts b/packages/grida-ai-agent/src/providers/index.test.ts
index a461db3ae..8835249e3 100644
--- a/packages/grida-ai-agent/src/providers/index.test.ts
+++ b/packages/grida-ai-agent/src/providers/index.test.ts
@@ -1,19 +1,37 @@
 import { describe, expect, it } from "vitest";
 import type { SecretsStore } from "../secrets";
+import type { EndpointProviderConfig } from "../protocol/endpoints";
+import type { EndpointProvidersStore } from "./endpoints";
 import {
   MODEL_BY_TIER,
   ProviderUnavailableError,
   resolveProvider,
 } from "./index";
 
-function deps(keys: Record<string, string | null> = {}) {
+function deps(
+  keys: Record<string, string | null> = {},
+  endpoints?: EndpointProviderConfig[]
+) {
   return {
     secrets: {
       _getKey: async (providerId: string) => keys[providerId] ?? null,
     } as SecretsStore,
+    endpoints: endpoints
+      ? ({
+          list: async () => endpoints,
+          get: async (id: string) => endpoints.find((e) => e.id === id) ?? null,
+        } as EndpointProvidersStore)
+      : undefined,
   };
 }
 
+const OLLAMA: EndpointProviderConfig = {
+  id: "ollama",
+  label: "Ollama",
+  base_url: "http://localhost:11434/v1",
+  models: [{ id: "llama3.1:8b" }, { id: "qwen3:32b" }],
+};
+
 describe("resolveProvider", () => {
   it("prefers OpenRouter over Vercel when both BYOK keys exist", async () => {
     const provider = await resolveProvider(
@@ -64,3 +82,68 @@ describe("resolveProvider", () => {
     expect(picked.modelId).toBe("google/gemini-3.5-flash");
   });
 });
+
+describe("resolveProvider — endpoint providers (issue #806)", () => {
+  it("resolves a configured endpoint with NO key (the no-signup path)", async () => {
+    const provider = await resolveProvider(deps({}, [OLLAMA]));
+    expect(provider.provider_id).toBe("ollama");
+    expect(provider.kind).toBe("endpoint");
+  });
+
+  it("BYOK keys take precedence over configured endpoints", async () => {
+    const provider = await resolveProvider(
+      deps({ openrouter: "sk-or" }, [OLLAMA])
+    );
+    expect(provider.provider_id).toBe("openrouter");
+  });
+
+  it("an explicit endpoint pick skips BYOK precedence", async () => {
+    const provider = await resolveProvider(
+      deps({ openrouter: "sk-or" }, [OLLAMA]),
+      { explicit: "ollama" }
+    );
+    expect(provider.provider_id).toBe("ollama");
+    expect(provider.kind).toBe("endpoint");
+  });
+
+  it("an endpoint with no registered models is not resolvable", async () => {
+    const empty = { ...OLLAMA, models: [] };
+    await expect(resolveProvider(deps({}, [empty]))).rejects.toBeInstanceOf(
+      ProviderUnavailableError
+    );
+    await expect(
+      resolveProvider(deps({}, [empty]), { explicit: "ollama" })
+    ).rejects.toMatchObject({ provider_id: "ollama" });
+  });
+
+  it("an unknown explicit provider id throws with the picked id", async () => {
+    await expect(
+      resolveProvider(deps({}, [OLLAMA]), { explicit: "nope" })
+    ).rejects.toMatchObject({ provider_id: "nope" });
+  });
+
+  it("every tier maps to the endpoint's default model; explicit ids pass through", async () => {
+    const provider = await resolveProvider(deps({}, [OLLAMA]));
+    // No default_model_id configured → models[0]. The titler/compactor
+    // ask for `nano`; on an endpoint that must land on a served model,
+    // never the catalog tier id.
+    for (const tier of ["nano", "mini", "pro", "max"] as const) {
+      expect(
+        (provider.model_factory(tier) as { modelId: string }).modelId
+      ).toBe("llama3.1:8b");
+    }
+    expect(
+      (provider.model_factory("pro", "qwen3:32b") as { modelId: string })
+        .modelId
+    ).toBe("qwen3:32b");
+  });
+
+  it("honors an explicit default_model_id", async () => {
+    const provider = await resolveProvider(
+      deps({}, [{ ...OLLAMA, default_model_id: "qwen3:32b" }])
+    );
+    expect((provider.model_factory("pro") as { modelId: string }).modelId).toBe(
+      "qwen3:32b"
+    );
+  });
+});
diff --git a/packages/grida-ai-agent/src/providers/index.ts b/packages/grida-ai-agent/src/providers/index.ts
index ade93d5fe..104bf38bc 100644
--- a/packages/grida-ai-agent/src/providers/index.ts
+++ b/packages/grida-ai-agent/src/providers/index.ts
@@ -1,5 +1,5 @@
 /**
- * GRIDA-SEC-004 — BYOK provider resolver (in-package providers layer).
+ * GRIDA-SEC-004 — provider resolver (in-package providers layer).
  *
  * Picks the active provider for an agent run and returns a runnable
  * `ModelFactory`. Resolution is a node-only, in-process concern: it reads
@@ -7,9 +7,18 @@
  * secrets threat model) and never calls the model itself — it only builds
  * the factory, so it's cheap on the hot path and easy to test.
  *
- * This is the providers layer, not a generic model-provider router. V1 is
- * BYOK-only: OpenRouter takes precedence over Vercel, and a missing
- * key throws `ProviderUnavailableError`.
+ * This is the providers layer, not a generic model-provider router. Two
+ * provider kinds exist:
+ *
+ *   - `byok` — the hardcoded third-party slots (OpenRouter, Vercel),
+ *     keyed by a stored secret.
+ *   - `endpoint` — ONE generalized OpenAI-compatible endpoint type
+ *     (issue #806): user-configured `{base_url, models[]}` with an
+ *     OPTIONAL key. Ollama is the preset; a missing key is not an error.
+ *
+ * Precedence: BYOK keys first (in metadata order), then configured
+ * endpoints that have at least one registered model. A configured-but-
+ * empty endpoint is not resolvable. Explicit picks skip precedence.
  */
 
 import { TIER_MODEL_IDS, type TierModelId } from "@grida/ai-models";
@@ -18,31 +27,44 @@ import type { ModelTier } from "../tiers";
 import type { SecretsStore } from "../secrets";
 import {
   BYOK_PROVIDER_METADATA,
+  isByokProviderId,
   type ByokProviderId,
 } from "../protocol/provider-ids";
-import { makeOpenRouterFactory, makeVercelFactory } from "./byok";
+import {
+  endpointDefaultModelId,
+  type EndpointProviderConfig,
+} from "../protocol/endpoints";
+import type { EndpointProvidersStore } from "./endpoints";
+import {
+  makeEndpointFactory,
+  makeOpenRouterFactory,
+  makeVercelFactory,
+} from "./byok";
+
+export { EndpointProvidersStore } from "./endpoints";
 
 /** Canonical tier->catalog-model map. One table, sourced from @grida/ai-models. */
 export const MODEL_BY_TIER: Record<ModelTier, TierModelId> = TIER_MODEL_IDS;
 
 export type ResolvedProvider = {
-  provider_id: ByokProviderId;
-  kind: "byok";
+  /** A BYOK provider id or a configured endpoint id. */
+  provider_id: string;
+  kind: "byok" | "endpoint";
   model_factory: ModelFactory;
 };
 
 /**
  * Single error class for both "no provider configured" and "you picked
- * provider X but no key is set" paths. The route maps `providerId` being
- * present to a 4xx with the picked-id surfaced in the body.
+ * provider X but it isn't available" paths. The route maps `providerId`
+ * being present to a 4xx with the picked-id surfaced in the body.
  */
 export class ProviderUnavailableError extends Error {
   readonly code = "provider_down" as const;
   constructor(public readonly provider_id?: string) {
     super(
       provider_id
-        ? `[agent-host-providers] explicit BYOK provider not available: ${provider_id}`
-        : "[agent-host-providers] no BYOK provider available"
+        ? `[agent-host-providers] explicit provider not available: ${provider_id}`
+        : "[agent-host-providers] no provider available"
     );
     this.name = "ProviderUnavailableError";
   }
@@ -50,14 +72,17 @@ export class ProviderUnavailableError extends Error {
 
 export type ResolveDeps = {
   secrets: SecretsStore;
+  /** Endpoint provider configs. Optional so key-only hosts/tests need not
+   *  wire a store; absent ⇒ no endpoint providers resolve. */
+  endpoints?: EndpointProvidersStore;
 };
 
 export type ResolveOptions = {
   /**
    * Optional caller override. If set, precedence is skipped and only the
-   * named BYOK provider is checked.
+   * named provider (BYOK or endpoint) is checked.
    */
-  explicit?: ByokProviderId;
+  explicit?: string;
 };
 
 export async function resolveProvider(
@@ -71,7 +96,14 @@ export async function resolveProvider(
   for (const provider of BYOK_PROVIDER_METADATA) {
     const key = await deps.secrets._getKey(provider.id);
     if (key) {
-      return makeResolvedProvider(provider.id, key);
+      return makeResolvedByok(provider.id, key);
+    }
+  }
+
+  if (deps.endpoints) {
+    for (const endpoint of await deps.endpoints.list()) {
+      const resolved = await maybeResolveEndpoint(endpoint, deps);
+      if (resolved) return resolved;
     }
   }
 
@@ -79,15 +111,21 @@ export async function resolveProvider(
 }
 
 async function resolveExplicit(
-  providerId: ByokProviderId,
+  providerId: string,
   deps: ResolveDeps
 ): Promise<ResolvedProvider> {
-  const key = await deps.secrets._getKey(providerId);
-  if (!key) throw new ProviderUnavailableError(providerId);
-  return makeResolvedProvider(providerId, key);
+  if (isByokProviderId(providerId)) {
+    const key = await deps.secrets._getKey(providerId);
+    if (!key) throw new ProviderUnavailableError(providerId);
+    return makeResolvedByok(providerId, key);
+  }
+  const endpoint = await deps.endpoints?.get(providerId);
+  const resolved = endpoint && (await maybeResolveEndpoint(endpoint, deps));
+  if (!resolved) throw new ProviderUnavailableError(providerId);
+  return resolved;
 }
 
-function makeResolvedProvider(
+function makeResolvedByok(
   providerId: ByokProviderId,
   key: string
 ): ResolvedProvider {
@@ -108,3 +146,28 @@ function makeResolvedProvider(
   const _exhaustive: never = providerId;
   throw new ProviderUnavailableError(_exhaustive);
 }
+
+/**
+ * An endpoint resolves only when it has a model to run (the default
+ * model: explicit `default_model_id` or the first registered). The key is
+ * looked up under the endpoint's id and is optional by design — Ollama
+ * has no key, a self-hosted gateway may.
+ */
+async function maybeResolveEndpoint(
+  endpoint: EndpointProviderConfig,
+  deps: ResolveDeps
+): Promise<ResolvedProvider | null> {
+  const defaultModelId = endpointDefaultModelId(endpoint);
+  if (!defaultModelId) return null;
+  const key = await deps.secrets._getKey(endpoint.id);
+  return {
+    provider_id: endpoint.id,
+    kind: "endpoint",
+    model_factory: makeEndpointFactory({
+      id: endpoint.id,
+      base_url: endpoint.base_url,
+      api_key: key?.trim() || undefined,
+      default_model_id: defaultModelId,
+    }),
+  };
+}
diff --git a/packages/grida-ai-agent/src/providers/probe.test.ts b/packages/grida-ai-agent/src/providers/probe.test.ts
new file mode 100644
index 000000000..ed75ac78b
--- /dev/null
+++ b/packages/grida-ai-agent/src/providers/probe.test.ts
@@ -0,0 +1,125 @@
+import { describe, expect, it } from "vitest";
+import { probeEndpointModels, type ProbeFetch } from "./probe";
+
+/** Fake fetch keyed by URL; POSTs may key on `url body.model`. */
+function fakeFetch(routes: Record<string, unknown>): ProbeFetch {
+  return async (url, init) => {
+    let key = url;
+    if (init.method === "POST" && init.body) {
+      const model = (JSON.parse(init.body) as { model?: string }).model;
+      if (model && `${url} ${model}` in routes) key = `${url} ${model}`;
+    }
+    if (key in routes) {
+      return new Response(JSON.stringify(routes[key]), { status: 200 });
+    }
+    return new Response("not found", { status: 404 });
+  };
+}
+
+const BASE = "http://localhost:11434/v1";
+
+describe("probeEndpointModels", () => {
+  it("reads Ollama /api/tags with capability mapping", async () => {
+    const result = await probeEndpointModels(
+      BASE,
+      fakeFetch({
+        "http://localhost:11434/api/tags": {
+          models: [
+            { name: "gemma4:31b-mlx", capabilities: ["completion", "tools"] },
+            { name: "tinyllama:1b", capabilities: ["completion"] },
+            { name: "old-model:7b" }, // older Ollama: no capabilities field
+          ],
+        },
+      })
+    );
+    expect(result).toEqual({
+      ok: true,
+      source: "ollama",
+      models: [
+        { id: "gemma4:31b-mlx", tool_call: true },
+        { id: "tinyllama:1b", tool_call: false },
+        { id: "old-model:7b", tool_call: undefined },
+      ],
+    });
+  });
+
+  it("fills the context window — loaded allocation beats the model max", async () => {
+    const result = await probeEndpointModels(
+      BASE,
+      fakeFetch({
+        "http://localhost:11434/api/tags": {
+          models: [
+            { name: "loaded:31b", capabilities: ["tools"] },
+            { name: "cold:7b", capabilities: ["tools"] },
+            { name: "opaque:1b", capabilities: ["tools"] },
+          ],
+        },
+        // `loaded:31b` is running with a capped allocation — /api/ps is
+        // the server's truth and must win over the /api/show maximum.
+        "http://localhost:11434/api/ps": {
+          models: [{ name: "loaded:31b", context_length: 32_768 }],
+        },
+        "http://localhost:11434/api/show loaded:31b": {
+          model_info: { "gemma4.context_length": 262_144 },
+        },
+        "http://localhost:11434/api/show cold:7b": {
+          model_info: { "llama.context_length": 131_072 },
+        },
+        // `opaque:1b`: /api/show 404s → contextWindow stays unset.
+      })
+    );
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    const byId = new Map(result.models.map((m) => [m.id, m.contextWindow]));
+    expect(byId.get("loaded:31b")).toBe(32_768);
+    expect(byId.get("cold:7b")).toBe(131_072);
+    expect(byId.get("opaque:1b")).toBeUndefined();
+  });
+
+  it("falls back to the OpenAI /models listing (ids only)", async () => {
+    const result = await probeEndpointModels(
+      "http://localhost:4000/v1",
+      fakeFetch({
+        "http://localhost:4000/v1/models": {
+          object: "list",
+          data: [{ id: "gpt-proxy-a" }, { id: "gpt-proxy-b" }],
+        },
+      })
+    );
+    expect(result).toEqual({
+      ok: true,
+      source: "openai",
+      models: [{ id: "gpt-proxy-a" }, { id: "gpt-proxy-b" }],
+    });
+  });
+
+  it("reports unreachable endpoints without throwing", async () => {
+    const result = await probeEndpointModels(BASE, async () => {
+      throw new Error("ECONNREFUSED");
+    });
+    expect(result.ok).toBe(false);
+    if (result.ok) return;
+    expect(result.error).toMatch(/is the server running/);
+  });
+
+  it("rejects non-http(s) and malformed base URLs", async () => {
+    for (const url of ["file:///etc/passwd", "not a url"]) {
+      const result = await probeEndpointModels(url, fakeFetch({}));
+      expect(result.ok).toBe(false);
+    }
+  });
+
+  it("skips malformed rows instead of failing the probe", async () => {
+    const result = await probeEndpointModels(
+      BASE,
+      fakeFetch({
+        "http://localhost:11434/api/tags": {
+          models: [{ name: "good:1b" }, { nope: true }, "junk", { name: "" }],
+        },
+      })
+    );
+    expect(result.ok).toBe(true);
+    if (!result.ok) return;
+    expect(result.models.map((m) => m.id)).toEqual(["good:1b"]);
+  });
+});
diff --git a/packages/grida-ai-agent/src/providers/probe.ts b/packages/grida-ai-agent/src/providers/probe.ts
new file mode 100644
index 000000000..b8a713cc2
--- /dev/null
+++ b/packages/grida-ai-agent/src/providers/probe.ts
@@ -0,0 +1,246 @@
+/**
+ * GRIDA-SEC-004 — endpoint model probe (issue #806).
+ *
+ * Host-side discovery of the models an OpenAI-compatible endpoint
+ * serves, so the user never has to type model ids by hand. Host-side
+ * because the packaged renderer cannot reach the endpoint itself: its
+ * origin is `https://grida.co`, which Ollama's CORS policy rejects —
+ * only the agent host shares the machine with the endpoint.
+ *
+ * Two shapes, tried in order:
+ *
+ *   1. **Ollama native** — `GET <origin>/api/tags` for ids + capability
+ *      tags (`tool_call` comes back real), enriched with the context
+ *      window: `/api/ps` first (a LOADED model's `context_length` is the
+ *      server's actual allocation — authoritative), then `/api/show`
+ *      `model_info` (the model's maximum) for models not loaded.
+ *   2. **Generic OpenAI-compatible** — `GET <base_url>/models`
+ *      (LiteLLM, vLLM, …). Ids only.
+ *
+ * Context-window honesty: a server explicitly capped below a model's
+ * maximum (e.g. `OLLAMA_CONTEXT_LENGTH`) reports the cap via `/api/ps`
+ * only once the model is loaded — the `/api/show` maximum can overshoot
+ * such a setup. The field stays user-editable for exactly that case.
+ *
+ * Threat note (reviewed): the probe makes the host GET a user-supplied
+ * URL. This is the SAME egress the run path already performs against a
+ * configured endpoint (and the writer is the same authenticated loopback
+ * client), so it widens nothing — but the route must never become a
+ * generic proxy: responses are parsed and reduced to `{id, tool_call}`
+ * rows; raw bodies never reach the client. Reads are bounded (timeout +
+ * size cap) and the URL shape is pinned to http(s).
+ */
+
+import {
+  parseEndpointBaseUrl,
+  type ProbedEndpointModel,
+} from "../protocol/endpoints";
+
+const PROBE_TIMEOUT_MS = 4_000;
+const MAX_BODY_BYTES = 1_048_576;
+const MAX_MODELS = 64;
+
+export type EndpointProbeResult =
+  | { ok: true; source: "ollama" | "openai"; models: ProbedEndpointModel[] }
+  | { ok: false; error: string };
+
+/** The `fetch` seam — tests inject a fake; production uses the global. */
+export type ProbeFetch = (
+  url: string,
+  init: {
+    signal: AbortSignal;
+    method?: string;
+    headers?: Record<string, string>;
+    body?: string;
+  }
+) => Promise<Response>;
+
+export async function probeEndpointModels(
+  baseUrl: string,
+  fetchImpl: ProbeFetch = fetch
+): Promise<EndpointProbeResult> {
+  const parsed = parseEndpointBaseUrl(baseUrl);
+  if (!parsed.ok) return parsed;
+  const { url } = parsed;
+
+  // Both shapes probed concurrently (idempotent GETs; a generic gateway
+  // shouldn't wait out the full Ollama timeout first). Ollama wins when
+  // it answers — capability tags ride along.
+  const base = baseUrl.replace(/\/+$/, "");
+  const ollamaProbe = requestJson(fetchImpl, `${url.origin}/api/tags`);
+  const openaiProbe = requestJson(fetchImpl, `${base}/models`);
+
+  const ollama = await ollamaProbe;
+  if (ollama.ok) {
+    const models = parseOllamaTags(ollama.data);
+    if (models) {
+      await enrichContextWindows(fetchImpl, url.origin, models);
+      return { ok: true, source: "ollama", models };
+    }
+  }
+
+  // Generic OpenAI-compatible — ids only.
+  const openai = await openaiProbe;
+  if (openai.ok) {
+    const models = parseOpenAiModels(openai.data);
+    if (models) return { ok: true, source: "openai", models };
+  }
+
+  return {
+    ok: false,
+    error:
+      "no model listing at this endpoint — is the server running? " +
+      `(tried ${url.origin}/api/tags and ${base}/models)`,
+  };
+}
+
+type JsonProbe = { ok: true; data: unknown } | { ok: false };
+
+/** One bounded JSON request: GET, or POST when `body` is given. Never
+ *  throws — every failure mode (timeout, non-2xx, oversize, bad JSON)
+ *  collapses to `{ok: false}`; the probe treats them all as "no answer". */
+async function requestJson(
+  fetchImpl: ProbeFetch,
+  url: string,
+  body?: unknown
+): Promise<JsonProbe> {
+  try {
+    const res = await fetchImpl(url, {
+      signal: AbortSignal.timeout(PROBE_TIMEOUT_MS),
+      ...(body !== undefined
+        ? {
+            method: "POST",
+            headers: { "content-type": "application/json" },
+            body: JSON.stringify(body),
+          }
+        : {}),
+    });
+    if (!res.ok) return { ok: false };
+    const text = await readBodyBounded(res);
+    if (text === null) return { ok: false };
+    return { ok: true, data: JSON.parse(text) };
+  } catch {
+    return { ok: false };
+  }
+}
+
+/**
+ * Read a response body of at most {@link MAX_BODY_BYTES} — the bound is
+ * enforced ON THE WIRE (declared length first, then a capped stream
+ * read), not by buffering an arbitrarily large body and measuring after.
+ * Returns `null` when the cap is exceeded.
+ */
+async function readBodyBounded(res: Response): Promise<string | null> {
+  const declared = Number(res.headers.get("content-length"));
+  if (Number.isFinite(declared) && declared > MAX_BODY_BYTES) return null;
+  if (!res.body) {
+    const text = await res.text();
+    return text.length > MAX_BODY_BYTES ? null : text;
+  }
+  const reader = res.body.getReader();
+  const chunks: Uint8Array[] = [];
+  let total = 0;
+  for (;;) {
+    const { done, value } = await reader.read();
+    if (done) break;
+    total += value.byteLength;
+    if (total > MAX_BODY_BYTES) {
+      void reader.cancel().catch(() => {});
+      return null;
+    }
+    chunks.push(value);
+  }
+  const buf = new Uint8Array(total);
+  let offset = 0;
+  for (const chunk of chunks) {
+    buf.set(chunk, offset);
+    offset += chunk.byteLength;
+  }
+  return new TextDecoder().decode(buf);
+}
+
+/**
+ * Fill `contextWindow` per model. `/api/ps` first — a loaded model's
+ * `context_length` is what the server actually allocated; `/api/show`'s
+ * `model_info.<arch>.context_length` (the model's maximum) covers the
+ * rest. Every miss leaves the field unset (the registry default applies
+ * downstream). Mutates `models` in place.
+ */
+async function enrichContextWindows(
+  fetchImpl: ProbeFetch,
+  origin: string,
+  models: ProbedEndpointModel[]
+): Promise<void> {
+  const loaded = new Map<string, number>();
+  const ps = await requestJson(fetchImpl, `${origin}/api/ps`);
+  if (ps.ok) {
+    const rows = (ps.data as { models?: unknown } | null)?.models;
+    if (Array.isArray(rows)) {
+      for (const row of rows) {
+        const name = (row as { name?: unknown } | null)?.name;
+        const length = (row as { context_length?: unknown }).context_length;
+        if (typeof name === "string" && isPositiveInt(length)) {
+          loaded.set(name, length);
+        }
+      }
+    }
+  }
+  await Promise.all(
+    models.map(async (model) => {
+      const allocated = loaded.get(model.id);
+      if (allocated !== undefined) {
+        model.contextWindow = allocated;
+        return;
+      }
+      const show = await requestJson(fetchImpl, `${origin}/api/show`, {
+        model: model.id,
+      });
+      if (!show.ok) return;
+      const info = (show.data as { model_info?: unknown } | null)?.model_info;
+      if (!info || typeof info !== "object") return;
+      for (const [key, value] of Object.entries(info)) {
+        if (key.endsWith(".context_length") && isPositiveInt(value)) {
+          model.contextWindow = value;
+          return;
+        }
+      }
+    })
+  );
+}
+
+function isPositiveInt(value: unknown): value is number {
+  return typeof value === "number" && Number.isInteger(value) && value > 0;
+}
+
+/** `GET /api/tags` → `{models: [{name, capabilities?: string[]}]}`. */
+function parseOllamaTags(data: unknown): ProbedEndpointModel[] | null {
+  const models = (data as { models?: unknown } | null)?.models;
+  if (!Array.isArray(models)) return null;
+  const out: ProbedEndpointModel[] = [];
+  for (const m of models.slice(0, MAX_MODELS)) {
+    const name = (m as { name?: unknown } | null)?.name;
+    if (typeof name !== "string" || name.length === 0) continue;
+    const caps = (m as { capabilities?: unknown }).capabilities;
+    out.push({
+      id: name,
+      // Capabilities reported ⇒ trust them; absent (older Ollama) ⇒
+      // unknown, leave undefined so the registry's permissive default
+      // applies downstream.
+      tool_call: Array.isArray(caps) ? caps.includes("tools") : undefined,
+    });
+  }
+  return out;
+}
+
+/** `GET <base>/models` → `{data: [{id}]}` (OpenAI list shape). */
+function parseOpenAiModels(data: unknown): ProbedEndpointModel[] | null {
+  const rows = (data as { data?: unknown } | null)?.data;
+  if (!Array.isArray(rows)) return null;
+  const out: ProbedEndpointModel[] = [];
+  for (const m of rows.slice(0, MAX_MODELS)) {
+    const id = (m as { id?: unknown } | null)?.id;
+    if (typeof id !== "string" || id.length === 0) continue;
+    out.push({ id });
+  }
+  return out;
+}
diff --git a/packages/grida-ai-agent/src/runtime/index.ts b/packages/grida-ai-agent/src/runtime/index.ts
index 8ef05c90e..445985347 100644
--- a/packages/grida-ai-agent/src/runtime/index.ts
+++ b/packages/grida-ai-agent/src/runtime/index.ts
@@ -19,7 +19,6 @@
 import crypto from "node:crypto";
 import { AGENT_SESSION_AGENT } from "../protocol/run";
 import { AGENT_DEFAULT_MODE } from "../protocol/mode";
-import type { ByokProviderId } from "../protocol/provider-ids";
 import {
   resolveProvider,
   ProviderUnavailableError,
@@ -28,15 +27,21 @@ import {
 import { createRecorderConsumer } from "../session/recorder";
 import { titler } from "../session/titler";
 import type { SessionsStore } from "../session/store";
-import type { MessageUsage } from "../session/rows";
+import type { ChatModel, MessageUsage } from "../session/rows";
 import {
   DEFAULT_COMPACTION_CONFIG,
   compactSession,
   resolveModelLimits,
   shouldCompact,
   type CompactionConfig,
+  type ResolveModelLimits,
 } from "../session/compaction";
 import type { compactor } from "../session/compactor";
+import {
+  endpointDefaultModelId,
+  resolveEndpointModels,
+  type EndpointProviderConfig,
+} from "../protocol/endpoints";
 import { discoverSkills } from "../skills/discovery";
 import { discoverProjectInstructions } from "../skills/project-instructions";
 import type { SkillBodyCache, SkillIndex } from "../skills/types";
@@ -80,7 +85,7 @@ type SessionContext = {
 async function resolveOrCreateSession(
   store: SessionsStore,
   req: RunRequest,
-  provider: { provider_id: ByokProviderId }
+  provider: { provider_id: string }
 ): Promise<string | Response> {
   if (req.session_id) {
     const existing = await store.get(req.session_id);
@@ -200,6 +205,13 @@ export type AgentRuntimeDeps = ResolveDeps & {
 /** A provider resolved by {@link resolveProvider} (model factory + ids). */
 type ResolvedProvider = Awaited<ReturnType<typeof resolveProvider>>;
 
+/** One store snapshot powering both compaction limits and the summarizer
+ *  cap — see {@link AgentRuntime.limitsResolver}. */
+type LimitsResolution = {
+  resolve: ResolveModelLimits;
+  configs: readonly EndpointProviderConfig[];
+};
+
 /**
  * Everything {@link AgentRuntime.startTurn} needs to fire ONE turn, decoupled
  * from any HTTP request. The HTTP `run()` path and the core queue drain both
@@ -469,6 +481,69 @@ export class AgentRuntime {
     return ctx;
   }
 
+  /**
+   * Registry-aware model-limits resolution (issue #806): resolves over
+   * catalog ∪ registered endpoint models, and substitutes an endpoint
+   * session's missing `model_id` with the endpoint's default model — a
+   * tier-only Ollama session must NOT fall back to the catalog tier's
+   * frontier-sized window (1M assumed on an 8k model ⇒ compaction never
+   * fires ⇒ the session dies on context overflow). Carries the loaded
+   * configs so downstream checks (the summarizer cap) reuse the same
+   * snapshot instead of re-reading the store.
+   */
+  private async limitsResolver(): Promise<LimitsResolution> {
+    const endpoints = this.deps.endpoints;
+    if (!endpoints) {
+      return { resolve: (model) => resolveModelLimits(model), configs: [] };
+    }
+    const configs = await endpoints.list();
+    const custom = configs.flatMap(resolveEndpointModels);
+    const resolve: ResolveModelLimits = (model) => {
+      let effective = model;
+      if (model?.provider_id) {
+        const endpoint = configs.find((e) => e.id === model.provider_id);
+        const defaultId = endpoint && endpointDefaultModelId(endpoint);
+        // Substitute the endpoint default when the session has no model
+        // id — or a STALE one (saved against a model since removed from
+        // the config): either way, falling through to the catalog tier
+        // would assume a frontier-sized window on a local model. "Known"
+        // is scoped to THIS endpoint's models — another endpoint serving
+        // the same id must not vouch for it.
+        const knownOnEndpoint =
+          !!model.model_id &&
+          !!endpoint?.models.some((m) => m.id === model.model_id);
+        if (defaultId && !knownOnEndpoint) {
+          effective = { ...model, model_id: defaultId };
+        }
+      }
+      return resolveModelLimits(effective, custom);
+    };
+    return { resolve, configs };
+  }
+
+  /**
+   * The summarizer's input cap for a session (issue #806). The compactor
+   * subagent asks for the `nano` tier, but an endpoint factory maps every
+   * tier to the endpoint's default model — so when the session runs on a
+   * configured endpoint, the cap must be that model's window, not the
+   * catalog nano model's. `undefined` keeps the compaction default.
+   */
+  private summarizerInputCap(
+    model: ChatModel | null,
+    limits: LimitsResolution
+  ): number | undefined {
+    const providerId = model?.provider_id;
+    if (!providerId) return undefined;
+    if (!limits.configs.some((e) => e.id === providerId)) return undefined;
+    // Limits of the endpoint's DEFAULT model (what `nano` resolves to):
+    // a model_id-less ChatModel routes through the resolver's default-
+    // model substitution above. Reserve room for the summary output —
+    // clamped to the window itself so a sub-5k model never gets handed
+    // more input than it can hold.
+    const window = limits.resolve({ provider_id: providerId }).context_window;
+    return Math.min(window, Math.max(1_024, window - 4_096));
+  }
+
   /**
    * Fire auto-compaction when the session is at/over its usable context
    * (RFC `session / compaction`). Blocks the turn on the summarizer — by
@@ -484,8 +559,11 @@ export class AgentRuntime {
     if (!this.compaction_enabled) return;
     const session = await this.deps.sessions_store.get(sessionId);
     if (!session) return;
-    const limits = resolveModelLimits(session.model);
-    if (!shouldCompact(session.total_tokens, limits, this.compaction_config)) {
+    const limits = await this.limitsResolver();
+    const modelLimits = limits.resolve(session.model);
+    if (
+      !shouldCompact(session.total_tokens, modelLimits, this.compaction_config)
+    ) {
       return;
     }
     try {
@@ -494,12 +572,14 @@ export class AgentRuntime {
           store: this.deps.sessions_store,
           model_factory: modelFactory,
           summarize: this.compaction_summarize,
+          resolve_limits: limits.resolve,
         },
         {
           session_id: sessionId,
           auto: true,
           config: this.compaction_config,
           signal,
+          summarizer_input_cap: this.summarizerInputCap(session.model, limits),
         }
       );
     } catch (err) {
@@ -997,13 +1077,20 @@ export class AgentRuntime {
       }
       throw err;
     }
+    const limits = await this.limitsResolver();
     const result = await compactSession(
       {
         store: this.deps.sessions_store,
         model_factory: provider.model_factory,
         summarize: this.compaction_summarize,
+        resolve_limits: limits.resolve,
       },
-      { session_id: sessionId, auto: false, config: this.compaction_config }
+      {
+        session_id: sessionId,
+        auto: false,
+        config: this.compaction_config,
+        summarizer_input_cap: this.summarizerInputCap(session.model, limits),
+      }
     );
     return Response.json(result);
   }
diff --git a/packages/grida-ai-agent/src/runtime/run-input.test.ts b/packages/grida-ai-agent/src/runtime/run-input.test.ts
index e6e5d2469..d9f8f17fc 100644
--- a/packages/grida-ai-agent/src/runtime/run-input.test.ts
+++ b/packages/grida-ai-agent/src/runtime/run-input.test.ts
@@ -315,3 +315,73 @@ describe("parseRunBody", () => {
     expect(parsed.approval_answer).toBeUndefined();
   });
 });
+
+describe("parseRunBody — model/provider gates over the open registry (#806)", () => {
+  const msg = { messages: [{ role: "user", content: "hi" }] };
+  const endpoints = {
+    registeredModels: async () => [{ id: "llama3.1:8b" }],
+    get: async (id: string) =>
+      id === "ollama"
+        ? { id: "ollama", base_url: "http://localhost:11434/v1", models: [] }
+        : null,
+  };
+  const deps = {
+    workspace_registry: { findById: async () => null },
+    endpoints,
+  };
+  const depsWithoutEndpoints = {
+    workspace_registry: { findById: async () => null },
+  };
+
+  it("accepts a catalog model id", async () => {
+    const parsed = await parseRunBody(
+      { ...msg, model_id: "anthropic/claude-opus-4.8" },
+      deps as never
+    );
+    expect(parsed).not.toBeInstanceOf(Response);
+  });
+
+  it("accepts a registered endpoint model id", async () => {
+    const parsed = await parseRunBody(
+      { ...msg, model_id: "llama3.1:8b" },
+      deps as never
+    );
+    expect(parsed).not.toBeInstanceOf(Response);
+    if (parsed instanceof Response) return;
+    expect(parsed.model_id).toBe("llama3.1:8b");
+  });
+
+  it("still 400s an unknown model id (the gate stays closed)", async () => {
+    const parsed = await parseRunBody(
+      { ...msg, model_id: "not-a-model" },
+      deps as never
+    );
+    expect(parsed).toBeInstanceOf(Response);
+    expect(parsed instanceof Response ? parsed.status : 0).toBe(400);
+  });
+
+  it("400s a registered-looking id when no endpoints store is wired", async () => {
+    const parsed = await parseRunBody(
+      { ...msg, model_id: "llama3.1:8b" },
+      depsWithoutEndpoints as never
+    );
+    expect(parsed).toBeInstanceOf(Response);
+  });
+
+  it("accepts a configured endpoint id as provider_id, rejects unknown", async () => {
+    const ok = await parseRunBody(
+      { ...msg, provider_id: "ollama" },
+      deps as never
+    );
+    expect(ok).not.toBeInstanceOf(Response);
+    if (ok instanceof Response) return;
+    expect(ok.explicit).toBe("ollama");
+
+    const bad = await parseRunBody(
+      { ...msg, provider_id: "nope" },
+      deps as never
+    );
+    expect(bad).toBeInstanceOf(Response);
+    expect(bad instanceof Response ? bad.status : 0).toBe(400);
+  });
+});
diff --git a/packages/grida-ai-agent/src/runtime/run-input.ts b/packages/grida-ai-agent/src/runtime/run-input.ts
index 58dadfa4b..0a1bcea40 100644
--- a/packages/grida-ai-agent/src/runtime/run-input.ts
+++ b/packages/grida-ai-agent/src/runtime/run-input.ts
@@ -20,16 +20,15 @@ import {
   type AgentMode,
 } from "../protocol/mode";
 import { AGENT_DEFAULT_TIER, AGENT_TIERS, type ModelTier } from "../tiers";
-import {
-  BYOK_PROVIDER_IDS,
-  type ByokProviderId,
-} from "../protocol/provider-ids";
 import type { SessionsStore } from "../session/store";
 import type { WorkspaceRegistry } from "../workspaces";
+import {
+  isKnownProviderId,
+  type EndpointProvidersStore,
+} from "../providers/endpoints";
 
-const ALLOWED_PROVIDER_IDS = new Set<string>(BYOK_PROVIDER_IDS);
 const ALLOWED_TIERS = new Set<string>(AGENT_TIERS);
-const ALLOWED_MODEL_IDS = new Set<string>(Object.keys(models.text.catalog));
+const CATALOG_MODEL_IDS = new Set<string>(Object.keys(models.text.catalog));
 const ALLOWED_ROLES = new Set<string>(["user", "assistant", "system"]);
 const ALLOWED_SKILL_IDS = new Set<string>(AGENT_SKILL_IDS);
 
@@ -42,9 +41,11 @@ export type NormalizedMessage = {
 export type RunRequest = {
   messages: NormalizedMessage[];
   tier: ModelTier;
-  /** Explicit catalog model id; overrides the tier→model mapping. */
+  /** Explicit model id (catalog or registered); overrides the tier→model
+   *  mapping. */
   model_id?: AgentModelId;
-  explicit?: ByokProviderId;
+  /** Explicit provider pick: BYOK id or configured endpoint id. */
+  explicit?: string;
   feature?: string;
   workspace_id?: string;
   workspace_root?: string;
@@ -58,6 +59,9 @@ export type RunRequest = {
 
 export type ParseRunBodyDeps = {
   workspace_registry: WorkspaceRegistry;
+  /** Endpoint provider configs (issue #806). When present, registered
+   *  model ids and endpoint provider ids join the allowed sets. */
+  endpoints?: EndpointProvidersStore;
 };
 
 export async function parseRunBody(
@@ -92,7 +96,14 @@ export async function parseRunBody(
       : AGENT_DEFAULT_TIER;
   let modelId: AgentModelId | undefined;
   if (b.model_id !== undefined) {
-    if (typeof b.model_id !== "string" || !ALLOWED_MODEL_IDS.has(b.model_id)) {
+    // Allowed model ids = static catalog ∪ user-registered endpoint
+    // models (the open-registry seam, issue #806). Still a closed gate:
+    // an id neither table knows 400s.
+    const allowed =
+      typeof b.model_id === "string" &&
+      (CATALOG_MODEL_IDS.has(b.model_id) ||
+        (await isRegisteredModelId(b.model_id, deps)));
+    if (!allowed) {
       return Response.json(
         { error: `modelId not allowed: ${String(b.model_id)}` },
         { status: 400 }
@@ -100,18 +111,19 @@ export async function parseRunBody(
     }
     modelId = b.model_id as AgentModelId;
   }
-  let explicit: ByokProviderId | undefined;
+  let explicit: string | undefined;
   if (b.provider_id !== undefined) {
-    if (
-      typeof b.provider_id !== "string" ||
-      !ALLOWED_PROVIDER_IDS.has(b.provider_id)
-    ) {
+    const providerId = typeof b.provider_id === "string" ? b.provider_id : "";
+    const allowed =
+      providerId.length > 0 &&
+      (await isKnownProviderId(providerId, deps.endpoints));
+    if (!allowed) {
       return Response.json(
         { error: `providerId not allowed: ${String(b.provider_id)}` },
         { status: 400 }
       );
     }
-    explicit = b.provider_id as ByokProviderId;
+    explicit = providerId;
   }
   let workspaceId: string | undefined;
   let workspaceRoot: string | undefined;
@@ -159,6 +171,15 @@ export async function parseRunBody(
   };
 }
 
+async function isRegisteredModelId(
+  modelId: string,
+  deps: ParseRunBodyDeps
+): Promise<boolean> {
+  if (!deps.endpoints) return false;
+  const registered = await deps.endpoints.registeredModels();
+  return registered.some((m) => m.id === modelId);
+}
+
 /**
  * The id of the user message a direct `/agent/run` fires — the LAST
  * user-role message of the incoming array (the AI SDK client resends the
diff --git a/packages/grida-ai-agent/src/session/compaction.test.ts b/packages/grida-ai-agent/src/session/compaction.test.ts
index dff63930c..1b3d9d0ba 100644
--- a/packages/grida-ai-agent/src/session/compaction.test.ts
+++ b/packages/grida-ai-agent/src/session/compaction.test.ts
@@ -98,6 +98,30 @@ describe("threshold helpers", () => {
     expect(limits.context_window).toBeGreaterThan(0);
     expect(limits.output_limit).toBeGreaterThan(0);
   });
+
+  it("resolveModelLimits resolves a registered local model's real window (#806)", () => {
+    const custom = [
+      { id: "llama3.1:8b", contextWindow: 8_192, outputLimit: 2_048 },
+    ];
+    const limits = resolveModelLimits(
+      { provider_id: "ollama", tier: "pro", model_id: "llama3.1:8b" },
+      custom
+    );
+    // The pre-registry behavior fell back to the pro tier's frontier
+    // window (1M) for any unknown id — compaction never fired and the
+    // session died on overflow. The registry must surface the real 8k.
+    expect(limits.context_window).toBe(8_192);
+    expect(limits.output_limit).toBe(2_048);
+  });
+
+  it("resolveModelLimits still falls back to tier for unknown ids", () => {
+    const limits = resolveModelLimits({
+      provider_id: "ollama",
+      tier: "nano",
+      model_id: "unknown:0b",
+    });
+    expect(limits.context_window).toBeGreaterThan(100_000);
+  });
 });
 
 describe("splitTail", () => {
diff --git a/packages/grida-ai-agent/src/session/compaction.ts b/packages/grida-ai-agent/src/session/compaction.ts
index 3d8ab0c18..8d65c4690 100644
--- a/packages/grida-ai-agent/src/session/compaction.ts
+++ b/packages/grida-ai-agent/src/session/compaction.ts
@@ -67,12 +67,27 @@ export type ModelLimits = {
   output_limit: number;
 };
 
-/** Resolve a session's model limits from the catalog. Falls back to the
- *  default tier when the model can't be resolved. */
-export function resolveModelLimits(model: ChatModel | null): ModelLimits {
-  let spec = model?.model_id
-    ? models.text.modelSpecById(model.model_id)
-    : undefined;
+/** A model-limits resolver. The default ({@link resolveModelLimits} with
+ *  no custom list) only knows the static catalog; hosts with registered
+ *  endpoint models inject a registry-aware one (see `AgentRuntime`). */
+export type ResolveModelLimits = (model: ChatModel | null) => ModelLimits;
+
+/**
+ * Resolve a session's model limits over catalog ∪ `custom` (the open-
+ * registry seam, issue #806). Falls back to the default tier when the
+ * model can't be resolved — note this fallback assumes a frontier-sized
+ * window, which is why registered local models MUST resolve through
+ * `custom` rather than land here (an 8k local model treated as 1M never
+ * compacts and dies on context overflow).
+ */
+export function resolveModelLimits(
+  model: ChatModel | null,
+  custom?: readonly models.text.registry.CustomModelSpec[]
+): ModelLimits {
+  let spec: { contextWindow: number; outputLimit: number } | undefined =
+    model?.model_id
+      ? models.text.registry.resolve(model.model_id, custom)
+      : undefined;
   if (!spec && model?.tier) spec = models.text.byTier[model.tier];
   if (!spec) spec = models.text.byTier.pro;
   return { context_window: spec.contextWindow, output_limit: spec.outputLimit };
@@ -204,6 +219,10 @@ export type CompactionDeps = {
   model_factory: ModelFactory;
   /** Injected summarizer (defaults to the real `compactor.summarize`). */
   summarize?: compactor.Summarize;
+  /** Injected model-limits resolver (defaults to the catalog-only
+   *  {@link resolveModelLimits}). Hosts with registered endpoint models
+   *  inject a registry-aware one so local-model windows resolve real. */
+  resolve_limits?: ResolveModelLimits;
   /** Warning sink. Defaults to console.warn. */
   on_warn?: (message: string) => void;
 };
@@ -256,7 +275,7 @@ export async function compactSession(
 
   const session = await deps.store.get(opts.session_id);
   if (!session) return { compacted: false, reason: "session-not-found" };
-  const limits = resolveModelLimits(session.model);
+  const limits = (deps.resolve_limits ?? resolveModelLimits)(session.model);
 
   const messages = await deps.store.listVisibleMessages(opts.session_id);
 
diff --git a/packages/grida-ai-agent/src/session/compactor.ts b/packages/grida-ai-agent/src/session/compactor.ts
index f28b61a68..93f475852 100644
--- a/packages/grida-ai-agent/src/session/compactor.ts
+++ b/packages/grida-ai-agent/src/session/compactor.ts
@@ -20,8 +20,12 @@ import type { ModelTier } from "../tiers";
 
 /** Cheapest tier the provider exposes (RFC: `nano` / `small`). */
 const COMPACTOR_TIER: ModelTier = "nano";
-const DEFAULT_MAX_OUTPUT_TOKENS = 1024;
-const DEFAULT_TIMEOUT_MS = 30_000;
+// The cap must cover REASONING + the summary: on a thinking model the
+// output budget includes the think stream, and a tight cap truncates
+// before the Markdown summary lands. Non-thinking models stop at the
+// summary length anyway, so the ceiling is free for them.
+const DEFAULT_MAX_OUTPUT_TOKENS = 2048;
+const DEFAULT_TIMEOUT_MS = 60_000;
 
 const SYSTEM_PROMPT = `You compress a long agent/user conversation into a compact, faithful summary so the conversation can continue with less context.
 
diff --git a/packages/grida-ai-agent/src/session/rows.ts b/packages/grida-ai-agent/src/session/rows.ts
index 4459509e3..4ae8ad815 100644
--- a/packages/grida-ai-agent/src/session/rows.ts
+++ b/packages/grida-ai-agent/src/session/rows.ts
@@ -8,13 +8,13 @@
  * payloads against them.
  */
 
-import type { ByokProviderId } from "../protocol/provider-ids";
+import type { ProviderId } from "../protocol/provider-ids";
 import type { AgentModelId } from "../protocol/run";
 import type { AgentMode } from "../protocol/mode";
 import type { ModelTier } from "../tiers";
 
 export type ChatModel = {
-  provider_id: ByokProviderId;
+  provider_id: ProviderId;
   tier?: ModelTier;
   model_id?: AgentModelId;
 };
diff --git a/packages/grida-ai-agent/src/session/titler.ts b/packages/grida-ai-agent/src/session/titler.ts
index a911eaf1b..4dfdeca12 100644
--- a/packages/grida-ai-agent/src/session/titler.ts
+++ b/packages/grida-ai-agent/src/session/titler.ts
@@ -45,7 +45,13 @@ export namespace titler {
         system: SYSTEM_PROMPT,
         prompt,
         temperature: 0.3,
-        maxOutputTokens: 32,
+        // The cap must cover REASONING + text: on a thinking model
+        // (e.g. a local Ollama reasoning model) `completion_tokens`
+        // includes the think stream, and a tight cap is consumed before
+        // any title text lands (`finish_reason: length`, empty content).
+        // 512 leaves thinking headroom; a non-thinking nano stops at
+        // ~10 tokens anyway, so the ceiling costs nothing.
+        maxOutputTokens: 512,
         abortSignal: opts.signal,
       });
       return sanitize(text);
@@ -60,7 +66,10 @@ export namespace titler {
     model_factory: ModelFactory;
     /** First user message text — caller extracts from the request body. */
     user_text: string;
-    /** Hard timeout for the title gen call. Defaults to 15s. */
+    /** Hard timeout for the title gen call. Defaults to 60s — generous
+     *  because the call is fire-and-forget (a ceiling, not a wait): fast
+     *  hosted nanos finish in ~1s, while a local single-flight server
+     *  (Ollama) may queue the titler behind the main turn. */
     timeout_ms?: number;
   };
 
@@ -71,7 +80,7 @@ export namespace titler {
     if (!before) return null;
     if (!session_title.isDefault(before.title)) return null;
 
-    const signal = AbortSignal.timeout(opts.timeout_ms ?? 15_000);
+    const signal = AbortSignal.timeout(opts.timeout_ms ?? 60_000);
     const title = await generate({
       model_factory: opts.model_factory,
       user_text: opts.user_text,
diff --git a/packages/grida-ai-agent/src/transport.ts b/packages/grida-ai-agent/src/transport.ts
index 82ceb599f..c06ee85c0 100644
--- a/packages/grida-ai-agent/src/transport.ts
+++ b/packages/grida-ai-agent/src/transport.ts
@@ -38,6 +38,10 @@ import type {
   WorkspaceReadFileResult,
   WorkspaceWriteFileResult,
 } from "./protocol/resources";
+import type {
+  EndpointProviderConfig,
+  ProbedEndpointModel,
+} from "./protocol/endpoints";
 
 function base64(value: string): string {
   const g = globalThis as unknown as {
@@ -405,6 +409,34 @@ export namespace AgentTransport {
       },
     } as const;
 
+    readonly providers = {
+      /** Endpoint provider configs (issue #806) — readable plain config,
+       *  unlike secrets. */
+      list_endpoints: async (): Promise<EndpointProviderConfig[]> =>
+        await this.postJson<EndpointProviderConfig[]>(
+          "/providers/endpoints/list"
+        ),
+      set_endpoint: async (config: EndpointProviderConfig): Promise<void> => {
+        await this.postJson<unknown>("/providers/endpoints/set", { config });
+      },
+      delete_endpoint: async (id: string): Promise<void> => {
+        await this.postJson<unknown>("/providers/endpoints/delete", { id });
+      },
+      /** Where the endpoint config JSON lives on disk. */
+      info: async (): Promise<{ path: string }> =>
+        await this.postJson<{ path: string }>("/providers/endpoints/info"),
+      /** Discover the models an endpoint serves (host-side fetch). */
+      probe_endpoint: async (
+        baseUrl: string
+      ): Promise<{
+        source: "ollama" | "openai";
+        models: ProbedEndpointModel[];
+      }> =>
+        await this.postJson("/providers/endpoints/probe", {
+          base_url: baseUrl,
+        }),
+    } as const;
+
     readonly sessions = {
       list: async (filter: SessionListFilter = {}): Promise<SessionListPage> =>
         await this.getJson<SessionListPage>(sessionListPath(filter)),
diff --git a/packages/grida-ai-models/README.md b/packages/grida-ai-models/README.md
index 1ffc1fa52..5fe72ac40 100644
--- a/packages/grida-ai-models/README.md
+++ b/packages/grida-ai-models/README.md
@@ -64,6 +64,8 @@ Each `ModelSpec` contains:
 - `short_label` — optional, manually-curated compact name for space-constrained
   UI (e.g. `"Opus 4.8"`); falls back to `label` when unset
 - `multimodal`
+- `tool_call` — whether the model supports native tool/function calling
+  (explicit on every entry; the agent loop is tool-heavy)
 - `contextWindow`
 - `outputLimit`
 - `cost`
@@ -74,6 +76,20 @@ For UI that needs the compact name, call `models.text.displayLabel(spec)` — it
 returns `short_label` when present and `label` otherwise, so call sites never
 repeat the fallback.
 
+### Open registry (`models.text.registry`)
+
+`models.text.registry` is the seam for **user-registered models** the static
+catalogue does not know — local Ollama models, self-hosted OpenAI-compatible
+gateways. A `CustomModelSpec` needs only an `id`; `normalize` fills
+conservative defaults (8k context, tool-calling assumed) and
+`resolve(id, custom)` looks an id up over catalogue ∪ custom (the catalogue
+wins on collision). `cost` is optional on custom specs by design — a local
+model is first-class without a price card.
+
+```ts
+const spec = models.text.registry.resolve("llama3.1:8b", customSpecs);
+```
+
 ## Media Models
 
 Media model data lives under the `models` namespace:
diff --git a/packages/grida-ai-models/__tests__/registry.test.ts b/packages/grida-ai-models/__tests__/registry.test.ts
new file mode 100644
index 000000000..08bef512d
--- /dev/null
+++ b/packages/grida-ai-models/__tests__/registry.test.ts
@@ -0,0 +1,87 @@
+import { describe, expect, it } from "vitest";
+import models from "../src";
+
+const registry = models.text.registry;
+
+describe("models.text.registry.normalize", () => {
+  it("fills defaults for a bare id", () => {
+    const spec = registry.normalize({ id: "llama3.1:8b" });
+    expect(spec).toEqual({
+      id: "llama3.1:8b",
+      label: "llama3.1:8b",
+      multimodal: false,
+      tool_call: true,
+      contextWindow: registry.CUSTOM_MODEL_DEFAULTS.contextWindow,
+      outputLimit: registry.CUSTOM_MODEL_DEFAULTS.outputLimit,
+      cost: undefined,
+      custom: true,
+    });
+  });
+
+  it("keeps explicit fields, including tool_call: false", () => {
+    const spec = registry.normalize({
+      id: "qwen3:32b",
+      label: "Qwen 3 32B",
+      tool_call: false,
+      contextWindow: 131_072,
+      outputLimit: 8_192,
+      multimodal: true,
+    });
+    expect(spec.label).toBe("Qwen 3 32B");
+    expect(spec.tool_call).toBe(false);
+    expect(spec.contextWindow).toBe(131_072);
+    expect(spec.outputLimit).toBe(8_192);
+    expect(spec.multimodal).toBe(true);
+  });
+
+  it("treats an empty label as absent", () => {
+    expect(registry.normalize({ id: "m", label: "" }).label).toBe("m");
+  });
+});
+
+describe("models.text.registry.resolve", () => {
+  const custom = [
+    { id: "llama3.1:8b" },
+    { id: "anthropic/claude-sonnet-4.6", label: "shadowed" },
+  ];
+
+  it("resolves a catalogue id with custom: false and cost present", () => {
+    const spec = registry.resolve("anthropic/claude-opus-4.8", custom);
+    expect(spec?.custom).toBe(false);
+    expect(spec?.cost).toBeDefined();
+    expect(spec?.tool_call).toBe(true);
+  });
+
+  it("resolves a registered local id with normalized defaults", () => {
+    const spec = registry.resolve("llama3.1:8b", custom);
+    expect(spec?.custom).toBe(true);
+    expect(spec?.cost).toBeUndefined();
+    expect(spec?.contextWindow).toBe(
+      registry.CUSTOM_MODEL_DEFAULTS.contextWindow
+    );
+  });
+
+  it("catalogue wins over a colliding custom entry", () => {
+    const spec = registry.resolve("anthropic/claude-sonnet-4.6", custom);
+    expect(spec?.custom).toBe(false);
+    expect(spec?.label).toBe("Claude Sonnet 4.6");
+  });
+
+  it("returns undefined for an unknown id", () => {
+    expect(registry.resolve("nope:0b", custom)).toBeUndefined();
+    expect(registry.resolve("nope:0b")).toBeUndefined();
+  });
+
+  it("does not fuzzy-match custom ids (exact only)", () => {
+    // Catalogue lookup tolerates bare/date-suffixed ids; custom must not.
+    expect(registry.resolve("llama3.1", custom)).toBeUndefined();
+  });
+});
+
+describe("catalogue tool_call flags", () => {
+  it("every catalogue entry declares tool_call explicitly", () => {
+    for (const spec of Object.values(models.text.catalog)) {
+      expect(typeof spec.tool_call).toBe("boolean");
+    }
+  });
+});
diff --git a/packages/grida-ai-models/src/models.ts b/packages/grida-ai-models/src/models.ts
index 4fadf219a..1e2662b7d 100644
--- a/packages/grida-ai-models/src/models.ts
+++ b/packages/grida-ai-models/src/models.ts
@@ -90,6 +90,12 @@ export namespace models {
       short_label?: string;
       /** Whether the model accepts image/file inputs. */
       multimodal: boolean;
+      /**
+       * Whether the model supports native tool/function calling. Explicit
+       * on every entry — the agent loop is tool-heavy, so this flag gates
+       * "can this model drive the agent at all" decisions downstream.
+       */
+      tool_call: boolean;
       /** Maximum context window in tokens (input + output combined). */
       contextWindow: number;
       /** Maximum output tokens per response. */
@@ -108,6 +114,7 @@ export namespace models {
         id: "openai/gpt-5.4-nano",
         label: "GPT-5.4 Nano",
         multimodal: true,
+        tool_call: true,
         contextWindow: 400_000,
         outputLimit: 128_000,
         cost: { input: 0.2, output: 1.25, cacheRead: 0.02 },
@@ -116,6 +123,7 @@ export namespace models {
         id: "openai/gpt-5.4-mini",
         label: "GPT-5.4 Mini",
         multimodal: true,
+        tool_call: true,
         contextWindow: 400_000,
         outputLimit: 128_000,
         cost: { input: 0.75, output: 4.5, cacheRead: 0.075 },
@@ -124,6 +132,7 @@ export namespace models {
         id: "openai/gpt-5.5",
         label: "GPT-5.5",
         multimodal: true,
+        tool_call: true,
         contextWindow: 1_050_000,
         outputLimit: 128_000,
         cost: { input: 5, output: 30, cacheRead: 0.5 },
@@ -132,6 +141,7 @@ export namespace models {
         id: "openai/gpt-5.5-pro",
         label: "GPT-5.5 Pro",
         multimodal: true,
+        tool_call: true,
         contextWindow: 1_050_000,
         outputLimit: 128_000,
         cost: { input: 30, output: 180 },
@@ -141,6 +151,7 @@ export namespace models {
         label: "Claude Sonnet 4.6",
         short_label: "Sonnet 4.6",
         multimodal: true,
+        tool_call: true,
         contextWindow: 1_000_000,
         outputLimit: 128_000,
         cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 },
@@ -150,6 +161,7 @@ export namespace models {
         label: "Claude Opus 4.8",
         short_label: "Opus 4.8",
         multimodal: true,
+        tool_call: true,
         contextWindow: 1_000_000,
         outputLimit: 128_000,
         cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
@@ -159,6 +171,7 @@ export namespace models {
         label: "Claude Opus 4.7",
         short_label: "Opus 4.7",
         multimodal: true,
+        tool_call: true,
         contextWindow: 1_000_000,
         outputLimit: 128_000,
         cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 },
@@ -170,6 +183,7 @@ export namespace models {
         id: "google/gemini-3.5-flash",
         label: "Gemini 3.5 Flash",
         multimodal: true,
+        tool_call: true,
         contextWindow: 1_048_576,
         outputLimit: 65_536,
         cost: { input: 1.5, output: 9, cacheRead: 0.15 },
@@ -179,6 +193,7 @@ export namespace models {
         label: "Gemini 3.1 Pro Preview",
         short_label: "Gemini 3.1 Pro",
         multimodal: true,
+        tool_call: true,
         contextWindow: 1_048_576,
         outputLimit: 65_536,
         cost: { input: 2, output: 12, cacheRead: 0.2 },
@@ -239,6 +254,103 @@ export namespace models {
     export function displayLabel(spec: ModelSpec): string {
       return spec.short_label ?? spec.label;
     }
+
+    // ── models.text.registry ──────────────────────────────────────────
+    //
+    // The open-registry seam (issue #806): spec resolution over the
+    // static catalogue PLUS caller-supplied user-registered models (local
+    // Ollama models, self-hosted OpenAI-compatible gateways). Pure data —
+    // the caller owns where the custom list comes from (agent-host config,
+    // renderer fetch); this namespace only normalizes and resolves.
+
+    export namespace registry {
+      /**
+       * A user-registered text model — a model the static catalogue does
+       * not know (e.g. `llama3.1:8b` served by a local Ollama). Everything
+       * but the id is optional; {@link normalize} fills defaults.
+       *
+       * `cost` is optional by design: local models are free/unmetered, and
+       * a registered model must be first-class without a price card.
+       */
+      export interface CustomModelSpec {
+        /** Provider-side model id, verbatim (e.g. `"llama3.1:8b"`). */
+        id: string;
+        /** Display label. Falls back to the id. */
+        label?: string;
+        /** Whether the model accepts image/file inputs. Default `false`. */
+        multimodal?: boolean;
+        /**
+         * Whether the model supports native tool/function calling.
+         * Default `true` (permissive) — consumers warn rather than block
+         * when this is explicitly `false`.
+         */
+        tool_call?: boolean;
+        /** Context window in tokens. Default {@link CUSTOM_MODEL_DEFAULTS}. */
+        contextWindow?: number;
+        /** Max output tokens per response. Default {@link CUSTOM_MODEL_DEFAULTS}. */
+        outputLimit?: number;
+        /** Cost per 1M tokens in USD. Absent for local/unmetered models. */
+        cost?: ModelCostPerMillion;
+      }
+
+      /**
+       * A spec resolved through the open registry: either a catalogue
+       * {@link ModelSpec} (cost present, `custom: false`) or a normalized
+       * {@link CustomModelSpec} (cost may be absent, `custom: true`).
+       */
+      export interface ResolvedModelSpec extends Omit<ModelSpec, "cost"> {
+        cost?: ModelCostPerMillion;
+        /** True when the spec came from the caller's custom list. */
+        custom: boolean;
+      }
+
+      /**
+       * Defaults applied to a {@link CustomModelSpec} by {@link normalize}.
+       *
+       * The context window is deliberately conservative: overflowing a
+       * local model's real window kills the session mid-run, while a too-
+       * small assumption merely compacts early. 8k matches the common
+       * Ollama serving default; users with larger windows raise it in the
+       * model's config.
+       */
+      export const CUSTOM_MODEL_DEFAULTS = {
+        multimodal: false,
+        tool_call: true,
+        contextWindow: 8_192,
+        outputLimit: 4_096,
+      } as const;
+
+      /** Fill a custom spec's gaps with {@link CUSTOM_MODEL_DEFAULTS}. */
+      export function normalize(spec: CustomModelSpec): ResolvedModelSpec {
+        return {
+          id: spec.id,
+          label: spec.label && spec.label.length > 0 ? spec.label : spec.id,
+          multimodal: spec.multimodal ?? CUSTOM_MODEL_DEFAULTS.multimodal,
+          tool_call: spec.tool_call ?? CUSTOM_MODEL_DEFAULTS.tool_call,
+          contextWindow:
+            spec.contextWindow ?? CUSTOM_MODEL_DEFAULTS.contextWindow,
+          outputLimit: spec.outputLimit ?? CUSTOM_MODEL_DEFAULTS.outputLimit,
+          cost: spec.cost,
+          custom: true,
+        };
+      }
+
+      /**
+       * Resolve a model id over catalogue ∪ custom. The catalogue wins on
+       * a collision (it carries curated labels + real pricing); custom ids
+       * match exactly — local ids like `llama3.1:8b` have no namespacing
+       * convention to fuzzy-match on.
+       */
+      export function resolve(
+        modelId: string,
+        custom?: readonly CustomModelSpec[]
+      ): ResolvedModelSpec | undefined {
+        const fromCatalog = modelSpecById(modelId);
+        if (fromCatalog) return { ...fromCatalog, custom: false };
+        const fromCustom = custom?.find((m) => m.id === modelId);
+        return fromCustom ? normalize(fromCustom) : undefined;
+      }
+    }
   }
 
   // ── models.image ──────────────────────────────────────────────────
diff --git a/packages/grida-desktop-bridge/src/index.ts b/packages/grida-desktop-bridge/src/index.ts
index aeae7ed10..d9c142965 100644
--- a/packages/grida-desktop-bridge/src/index.ts
+++ b/packages/grida-desktop-bridge/src/index.ts
@@ -11,7 +11,9 @@ import type {
   AgentRunOptions,
   AgentServerHandshakeResponse,
   AgentUIMessageChunk,
-  ByokProviderId,
+  ProviderId,
+  EndpointProviderConfig,
+  ProbedEndpointModel,
   ChatMessageWithParts,
   ChatSessionRow,
   CreateSessionOptions,
@@ -224,9 +226,29 @@ export type DesktopBridge = {
     }) => Promise<void>;
   };
   secrets: {
-    has: (providerId: ByokProviderId) => Promise<boolean>;
-    set: (providerId: ByokProviderId, key: string) => Promise<void>;
-    delete: (providerId: ByokProviderId) => Promise<void>;
+    has: (providerId: ProviderId) => Promise<boolean>;
+    set: (providerId: ProviderId, key: string) => Promise<void>;
+    delete: (providerId: ProviderId) => Promise<void>;
+  };
+  /**
+   * Endpoint provider config (issue #806) — user-configured OpenAI-
+   * compatible endpoints (Ollama preset, self-hosted gateways). Plain
+   * readable config, unlike `secrets`: list returns full configs.
+   * Optional — older desktop binaries don't carry it; renderers must
+   * feature-detect and hide the surface when absent.
+   */
+  providers?: {
+    list_endpoints: () => Promise<EndpointProviderConfig[]>;
+    set_endpoint: (config: EndpointProviderConfig) => Promise<void>;
+    delete_endpoint: (id: string) => Promise<void>;
+    /** Where the endpoint config JSON lives (the hand-editable file). */
+    info: () => Promise<{ path: string }>;
+    /** Discover the models an endpoint serves (agent-host-side fetch —
+     *  the renderer's origin can't reach a local Ollama directly). */
+    probe_endpoint: (baseUrl: string) => Promise<{
+      source: "ollama" | "openai";
+      models: ProbedEndpointModel[];
+    }>;
   };
   agent: {
     run: (