diff --git a/SECURITY.md b/SECURITY.md index 307548b93..bd74691aa 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -402,6 +402,34 @@ http://localhost:*`. The nonce is generated in the proxy, exposed the BYOK provider; key material never returns to renderer. Closes the exfil path even if all four layers above were bypassed. +**Endpoint providers (local LLMs, #806).** The agent host additionally +serves `/providers/endpoints/*` — CRUD over user-configured +OpenAI-compatible endpoints (Ollama preset, self-hosted gateways), +persisted at `${userData}/endpoints.json`. The split that keeps layer 5 +intact: an endpoint **config** (base URL + registered model list) is +plain readable config the renderer may list back, while an endpoint's +optional **API key** rides the `/secrets/*` surface under the endpoint's +id (the secrets-route allowlist admits configured endpoint ids) and is +never readable. The config validator +(`packages/grida-ai-agent/src/protocol/endpoints.ts`) pins the shape — +http(s) URL, bounded sizes, unknown fields dropped — so a config write +cannot smuggle credentials or blobs into the readable store. The +`base_url` is user-owned egress by design (the desktop user points their +own agent at their own endpoint — same trust model as BYOK), and the +routes sit behind the same CORS/Referer/Basic-Auth stack as everything +else. The `/providers/endpoints/probe` route makes the host GET a +user-supplied URL's model listing (the renderer's grida.co origin cannot +reach a local Ollama itself) — the same egress a configured run already +performs; responses are parsed and reduced to +`{id, tool_call, contextWindow}` rows with bounded reads (timeout + size +cap), never proxied raw. On sandboxed +platforms the srt network policy additionally bounds all of this +structurally: outbound to **localhost** is permitted via the +`allowLocalBinding` local-ip rule (how the user's own `ollama serve` is +reached), while a config pointing at an arbitrary **remote** host is +blocked unless that host is in the enumerated `allowed_domains` — a +hostile config cannot turn the sidecar into an open exfil channel. + **Electron-side hardening (mandatory; see the [Electron security checklist](https://www.electronjs.org/docs/latest/tutorial/security)).** `contextIsolation: true`, `nodeIntegration: false`, `sandbox: true`, diff --git a/desktop/src/preload.ts b/desktop/src/preload.ts index 45f99b487..cc2dab164 100644 --- a/desktop/src/preload.ts +++ b/desktop/src/preload.ts @@ -445,6 +445,18 @@ const bridge: DesktopBridge = { }, }, + providers: { + list_endpoints: () => agentClient.providers.list_endpoints(), + set_endpoint: async (config) => { + await agentClient.providers.set_endpoint(config); + }, + delete_endpoint: async (id) => { + await agentClient.providers.delete_endpoint(id); + }, + info: () => agentClient.providers.info(), + probe_endpoint: (baseUrl) => agentClient.providers.probe_endpoint(baseUrl), + }, + agent: { run: (opts, onChunk) => // Fresh runs always return a stream (only `reconnect` may return diff --git a/docs/editor/desktop/_category_.json b/docs/editor/desktop/_category_.json new file mode 100644 index 000000000..48c425ee7 --- /dev/null +++ b/docs/editor/desktop/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Desktop", + "link": { + "type": "generated-index", + "title": "Grida Desktop", + "description": "Guides for the Grida Desktop app." + } +} diff --git a/docs/editor/desktop/img/local-models-configured.webp b/docs/editor/desktop/img/local-models-configured.webp new file mode 100644 index 000000000..141f2e3fb Binary files /dev/null and b/docs/editor/desktop/img/local-models-configured.webp differ diff --git a/docs/editor/desktop/local-models.md b/docs/editor/desktop/local-models.md new file mode 100644 index 000000000..a8650e1c3 --- /dev/null +++ b/docs/editor/desktop/local-models.md @@ -0,0 +1,127 @@ +--- +title: Local Models (Ollama) +description: Run the Grida Desktop agent on AI models that live on your own machine — no account, no API key. +keywords: + - ollama + - local llm + - local ai + - byok + - grida desktop + - ai agent +format: md +doc_tasks: + - update +--- + +# Local Models (Ollama) + +Grida Desktop's AI agent can run on models that live entirely on your own +machine, served by [Ollama](https://ollama.com). There is no account to +create and no API key to paste — your prompts, files, and the model's +responses never leave your computer. + +You can use local models alongside provider keys (OpenRouter, Vercel), or +as your only setup. + +## Requirements + +- **Grida Desktop** installed. +- **Ollama** installed and running (`ollama serve` — the desktop Ollama app + runs it for you). +- At least one model pulled, for example: + + ```sh + ollama pull gpt-oss:20b + ``` + +A note on expectations: local models vary widely in how well they drive +the agent. The agent leans on tool calling (reading and writing files, +running commands, planning), and small models often handle this poorly. +Models in the ~30B class and up are recommended for agent tasks. + +## Set up Ollama + +Open **Settings** from the app menu, find the **Local Models** card, and +click **Set up Ollama**. The base URL is prefilled with Ollama's local +address (`http://localhost:11434/v1`), and the models you have pulled are +detected automatically. + +![The Local Models card after setup, with an auto-detected model and its context window and tool-support badges](./img/local-models-configured.webp) + +Review the list and click **Save**: + +- Each detected model shows its **context window** and **tool-calling** + support as read-only badges. These come from the endpoint itself and + refresh whenever you open Settings (and on **Detect**, useful after you + `ollama pull` a new model). For a model that is currently loaded, the + context window is the size your server actually allocated; otherwise it + is the model's maximum. +- A model you add manually by id (for example on a gateway that doesn't + report capabilities) keeps editable fields instead — there, you are + the data source. Manually added models default to a conservative + `8192` context. + +The first model in the list is the default — background work like session +titles and summaries also runs on it. + +## Use a local model + +Registered models appear in the model picker in every agent composer, +grouped under the endpoint name (for example `gpt-oss:20b · Ollama`). +Pick one and chat as usual. Everything the agent does — reading your +workspace files, making edits, planning — runs against the local model. +Each session remembers the model it ran with. + +If you have no provider key configured at all, the agent uses your Ollama +setup automatically. + +## Models without tool support + +The agent works through tool calls, so a model that cannot make them +loses most of its abilities. Tool support is detected per model — Ollama +reports it, and `ollama show ` lists `tools` when a model supports +tool calling. When you select a model without tool support, the composer +shows a warning, but you can still chat with it. + +## Troubleshooting + +- **The model errors immediately.** Check that Ollama is running: open + `http://localhost:11434` in a browser — it should answer + `Ollama is running`. +- **A model is missing from the picker.** Only registered models appear. + Click **Detect** in **Settings → Local Models** after pulling a new + model, or add its id manually. +- **Long sessions stop or degrade.** The detected context window may be + larger than what your serving configuration actually allows (it + converges to the served size once the model has been loaded). To pin a + smaller value, set an override in the config file — see below. +- **Slow responses.** Local speed is your hardware's speed. Smaller + models respond faster but handle agent tasks worse. + +## Other OpenAI-compatible endpoints + +The base URL accepts any OpenAI-compatible server on your machine, so a +local gateway such as LiteLLM or vLLM works the same way: point the base +URL at it and register the models it serves. If the gateway needs an API +key, save it in the card's **API key** field (it appears once the +endpoint is saved) — the key is stored by the agent host and never shown +back. Ollama itself needs no key. + +## Advanced: the config file + +Everything on this page is stored as plain JSON in `endpoints.json` (the +settings card links to it). Detected values refresh automatically, so +hand-edits to them won't stick — if an endpoint reports a value that is +wrong for your setup (for example, your server caps context below the +model's maximum), pin the correction in the model's `overrides` instead. +Overrides always win over detected values, and detection never touches +them: + +```json +{ + "id": "gemma4:31b-mlx", + "tool_call": true, + "contextWindow": 262144, + "overrides": { "contextWindow": 32768 } +} +``` diff --git a/editor/app/desktop/settings/page.tsx b/editor/app/desktop/settings/page.tsx index 2e40b08a7..7e24b3f03 100644 --- a/editor/app/desktop/settings/page.tsx +++ b/editor/app/desktop/settings/page.tsx @@ -1,10 +1,11 @@ "use client"; -import { useCallback, useEffect, useState } from "react"; -import { Loader2 } from "lucide-react"; +import { useCallback, useEffect, useRef, useState } from "react"; +import { Loader2, Trash2 } from "lucide-react"; import { Button } from "@app/ui/components/button"; import { Input } from "@app/ui/components/input"; import { Label } from "@app/ui/components/label"; +import { Switch } from "@app/ui/components/switch"; import { Card, CardContent, @@ -16,9 +17,15 @@ import { Skeleton } from "@app/ui/components/skeleton"; import { BYOK_PROVIDER_LABELS, DesktopBridgeMissingError, + OLLAMA_ENDPOINT_PRESET, app, + mergeProbedModels, + providers, + resolveEndpointModel, secrets, type ByokProviderId, + type EndpointModelEntry, + type EndpointProviderConfig, } from "@/lib/desktop/bridge"; import { DesktopPageContent, @@ -47,11 +54,12 @@ export default function DesktopSettingsPage() {

Settings

- AI provider keys and app info. + AI provider keys, local models, and app info.

+ @@ -277,6 +285,615 @@ function StatusPill({ kind }: { kind: "loading" | "empty" | "configured" }) { ); } +/* ─────────────────────────── Local models ───────────────────────── */ + +/** + * Endpoint provider config (issue #806) — the Ollama preset slot. The + * agent host persists configs in `endpoints.json` (plain config, not a + * secret; the bridge may read them back, unlike keys). + * + * The section edits a local draft and persists on Save — endpoint config + * is structural (base URL + model list), so field-level autosave would + * fire half-formed configs at the host validator. + */ + +type LocalState = + | { kind: "loading" } + | { kind: "unsupported" } + | { kind: "ready"; draft: EndpointProviderConfig | null; dirty: boolean } + | { kind: "saving"; draft: EndpointProviderConfig | null } + | { kind: "error"; message: string; draft: EndpointProviderConfig | null }; + +function LocalModelsSection() { + const [state, setState] = useState({ kind: "loading" }); + const [newModelId, setNewModelId] = useState(""); + const [probing, setProbing] = useState(false); + const [probeNote, setProbeNote] = useState(null); + // Whether the endpoint config exists on the host — the API-key slot is + // only rendered then (the secrets allowlist accepts CONFIGURED endpoint + // ids; a key for an unsaved draft would 400). + const [persisted, setPersisted] = useState(false); + // Stale-write guard: detection runs async off a SNAPSHOT of the config + // while the form stays editable. Any user action that changes what the + // draft means (edit, save, remove, re-setup) bumps this; a completion + // holding an older number drops its write instead of resurrecting a + // deleted endpoint or wiping newer unsaved edits. + const opVersion = useRef(0); + + /** + * Discover the endpoint's models (agent-host-side fetch of Ollama's + * `/api/tags` + `/api/ps`/`/api/show`, or a generic `/models`) and + * refresh the DETECTED fields. Detection owns the top-level + * `tool_call`/`contextWindow` on each entry — the probe overwrites + * them freely; human corrections live in `overrides` (hand-edited + * JSON, or the inputs shown when detection has nothing) and are never + * touched here. + * + * `persist: true` (an already-saved config) writes the refreshed + * config straight back — detected facts aren't a user choice, so they + * don't sit in an unsaved draft. The setup flow passes `false` and + * keeps the explicit Save. + */ + const detectInto = useCallback( + async (base: EndpointProviderConfig, opts: { persist: boolean }) => { + const version = opVersion.current; + setProbing(true); + setProbeNote(null); + try { + const result = await providers.probeEndpoint(base.base_url); + // `base` is stale once the user edited/saved/removed mid-probe — + // applying it would undo their action. Drop the result silently. + if (opVersion.current !== version) return; + const merged = mergeProbedModels(base.models, result.models); + setProbeNote( + merged.discovered > 0 + ? `Found ${merged.discovered} model${merged.discovered === 1 ? "" : "s"}.` + : merged.updated > 0 + ? "Updated model details." + : "No new models found." + ); + if (merged.discovered === 0 && merged.updated === 0) return; + const next = { ...base, models: merged.models }; + if (opts.persist) { + await providers.setEndpoint(next); + if (opVersion.current !== version) return; + setState({ kind: "ready", draft: next, dirty: false }); + } else { + setState({ kind: "ready", draft: next, dirty: true }); + } + } catch (err) { + if (opVersion.current !== version) return; + setProbeNote( + `Couldn't reach the endpoint (${describeError(err)}) — add models manually.` + ); + } finally { + setProbing(false); + } + }, + [] + ); + + const refresh = useCallback(async () => { + if (!providers.isSupported()) { + setState({ kind: "unsupported" }); + return; + } + const version = ++opVersion.current; + try { + const list = await providers.listEndpoints(); + const ollama = list.find((e) => e.id === OLLAMA_ENDPOINT_PRESET.id); + if (opVersion.current !== version) return; + setState({ kind: "ready", draft: ollama ?? null, dirty: false }); + setPersisted(ollama != null); + // Detected values converge to the server's truth on every visit — + // notably /api/ps starts reporting a model's REAL allocation once + // it has been loaded. Fire-and-forget; failures only leave a note. + if (ollama) void detectInto(ollama, { persist: true }); + } catch (err) { + if (opVersion.current !== version) return; + setState({ kind: "error", message: describeError(err), draft: null }); + } + }, [detectInto]); + + useEffect(() => { + void refresh(); + }, [refresh]); + + const draft = "draft" in state ? state.draft : null; + + const edit = useCallback((next: EndpointProviderConfig) => { + opVersion.current += 1; + setState({ kind: "ready", draft: next, dirty: true }); + }, []); + + const handleSave = useCallback(async () => { + if (!draft) return; + const version = ++opVersion.current; + setState({ kind: "saving", draft }); + try { + await providers.setEndpoint(draft); + const list = await providers.listEndpoints(); + const saved = list.find((e) => e.id === OLLAMA_ENDPOINT_PRESET.id); + setPersisted(saved != null); + // An edit made while the save was in flight wins over the read-back. + if (opVersion.current !== version) return; + setState({ kind: "ready", draft: saved ?? null, dirty: false }); + } catch (err) { + if (opVersion.current !== version) return; + setState({ kind: "error", message: describeError(err), draft }); + } + }, [draft]); + + const handleEnable = useCallback(() => { + opVersion.current += 1; + const base: EndpointProviderConfig = { + ...OLLAMA_ENDPOINT_PRESET, + models: [], + }; + setState({ kind: "ready", draft: base, dirty: true }); + // Prefill from the running Ollama right away — the common path is + // "models already pulled; nothing to type". Not persisted until the + // user confirms with Save (the config doesn't exist yet). + void detectInto(base, { persist: false }); + }, [detectInto]); + + const handleRemove = useCallback(async () => { + if (!draft) return; + // Bump FIRST: an in-flight detection completing after this click must + // not persist its snapshot back and resurrect the deleted endpoint. + opVersion.current += 1; + let confirmed = false; + try { + confirmed = await providers.confirmDeleteEndpoint( + draft.label ?? draft.id + ); + } catch (err) { + setState({ kind: "error", message: describeError(err), draft }); + return; + } + if (!confirmed) return; + setState({ kind: "saving", draft }); + try { + await providers.deleteEndpoint(draft.id); + await refresh(); + } catch (err) { + setState({ kind: "error", message: describeError(err), draft }); + } + }, [draft, refresh]); + + const addModel = useCallback(() => { + if (!draft) return; + const id = newModelId.trim(); + if (!id || draft.models.some((m) => m.id === id)) return; + edit({ ...draft, models: [...draft.models, { id }] }); + setNewModelId(""); + }, [draft, newModelId, edit]); + + const saveDisabled = + state.kind !== "ready" || + !state.dirty || + !draft || + draft.base_url.trim().length === 0; + + // Old desktop binaries have no bridge surface for this — hide rather + // than render a dead section. + if (state.kind === "unsupported") return null; + + return ( + + + Local Models + + Run the agent on your own machine with{" "} + + Ollama + {" "} + — no account, no API key. Start ollama serve and pull a + model; Grida detects it automatically. Local models vary widely in + agent ability; larger models (~30B+) are recommended for agent tasks. + + + + {state.kind === "loading" ? ( + + ) : !draft ? ( +
+ +
+ ) : ( + <> +
+ + edit({ ...draft, base_url: e.target.value })} + placeholder={OLLAMA_ENDPOINT_PRESET.base_url} + autoComplete="off" + spellCheck={false} + /> +
+ +
+
+ + +
+ {probeNote && ( +

+ {probeNote} +

+ )} + {draft.models.length === 0 && !probing && ( +

+ Models you pulled in Ollama are detected automatically — or + add one by id (e.g. llama3.1:8b). The first model + is the default. +

+ )} + {draft.models.map((model, index) => ( + + edit({ + ...draft, + models: draft.models.map((m, i) => + i === index ? next : m + ), + }) + } + onRemove={() => + edit({ + ...draft, + models: draft.models.filter((_, i) => i !== index), + default_model_id: + draft.default_model_id === model.id + ? undefined + : draft.default_model_id, + }) + } + /> + ))} +
+ setNewModelId(e.target.value)} + onKeyDown={(e) => { + if (e.key === "Enter") { + e.preventDefault(); + addModel(); + } + }} + placeholder="model id, e.g. llama3.1:8b" + autoComplete="off" + spellCheck={false} + /> + +
+
+ + {persisted && ( + + )} + +
+ + +
+ + )} + + {state.kind === "error" && ( + + )} + + {draft && providers.canRevealConfigFile() && ( +

+ Stored as plain JSON — detected values refresh automatically; to pin + a value the endpoint reports wrong, set overrides in{" "} + + . +

+ )} +
+
+ ); +} + +type EndpointKeyState = + | { kind: "loading" | "empty" | "configured" | "saving" | "removing" } + | { kind: "error"; message: string }; + +/** + * Optional API key for a configured endpoint (issue #806). Ollama needs + * none; a keyed self-hosted gateway stores its key HERE — through the + * same write/presence/delete-only `secrets` surface as BYOK keys, under + * the ENDPOINT's id (GRIDA-SEC-004: never inside the endpoint config, + * never readable back). Rendered only for a persisted endpoint, since + * the secrets allowlist accepts configured endpoint ids only. + */ +function EndpointKeyRow({ + endpointId, + label, +}: { + endpointId: string; + label: string; +}) { + const [state, setState] = useState({ kind: "loading" }); + const [value, setValue] = useState(""); + + const refresh = useCallback(async () => { + try { + setState({ + kind: (await secrets.hasKey(endpointId)) ? "configured" : "empty", + }); + } catch (err) { + setState({ kind: "error", message: describeError(err) }); + } + }, [endpointId]); + + useEffect(() => { + void refresh(); + }, [refresh]); + + const handleSaveKey = useCallback(async () => { + setState({ kind: "saving" }); + try { + await secrets.setKey(endpointId, value); + setValue(""); + await refresh(); + } catch (err) { + setValue(""); + setState({ kind: "error", message: describeError(err) }); + } + }, [endpointId, value, refresh]); + + const handleRemoveKey = useCallback(async () => { + let confirmed = false; + try { + confirmed = await secrets.confirmDeleteKey(endpointId, label); + } catch (err) { + setState({ kind: "error", message: describeError(err) }); + return; + } + if (!confirmed) return; + setState({ kind: "removing" }); + try { + await secrets.deleteKey(endpointId); + await refresh(); + } catch (err) { + setState({ kind: "error", message: describeError(err) }); + } + }, [endpointId, label, refresh]); + + return ( +
+
+ + {(state.kind === "configured" || state.kind === "removing") && ( + + )} +
+ + {state.kind === "loading" ? ( + + ) : state.kind === "error" ? ( + + ) : state.kind === "configured" || state.kind === "removing" ? ( +

+ A key is configured for this endpoint — stored by the agent host, + never shown back. +

+ ) : ( + <> +
+ setValue(e.target.value)} + disabled={state.kind === "saving"} + autoComplete="off" + spellCheck={false} + /> + +
+

+ For gateways that require authentication (a keyed LiteLLM or vLLM). + Sent as a bearer token on requests to this endpoint. +

+ + )} +
+ ); +} + +const compactTokens = new Intl.NumberFormat("en-US", { notation: "compact" }); + +/** + * One registered model. Detection owns the capability fields: a value + * the endpoint reported renders as a read-only badge (no input over + * discoverable truth — a hand-typed snapshot only rots). Inputs appear + * ONLY where detection has nothing (manual adds, ids-only gateways); + * they write to `overrides`, the sticky human slot a probe refresh + * never touches. + */ +function LocalModelRow({ + model, + onChange, + onRemove, +}: { + model: EndpointModelEntry; + onChange: (next: EndpointModelEntry) => void; + onRemove: () => void; +}) { + const resolved = resolveEndpointModel(model); + const ctxOverridden = model.overrides?.contextWindow !== undefined; + const toolsOverridden = model.overrides?.tool_call !== undefined; + + return ( +
+ {model.id} + + {model.contextWindow !== undefined ? ( + + {/* non-null: this branch is gated on a detected contextWindow, + and resolution only ever overrides it, never unsets it */} + {compactTokens.format(resolved.contextWindow!)} ctx + {ctxOverridden ? " ·m" : ""} + + ) : ( + { + const value = e.target.valueAsNumber; + onChange({ + ...model, + overrides: { + ...model.overrides, + contextWindow: Number.isFinite(value) + ? Math.max(1, Math.floor(value)) + : undefined, + }, + }); + }} + placeholder="ctx (8192)" + aria-label="Context window (tokens)" + /> + )} + + {model.tool_call !== undefined ? ( + + {resolved.tool_call ? "tools" : "no tools"} + + ) : ( + + )} + + +
+ ); +} + /* ────────────────────────────── About ────────────────────────────── */ function AboutSection() { diff --git a/editor/app/desktop/welcome/page.tsx b/editor/app/desktop/welcome/page.tsx index f3f812599..6dcfcaddf 100644 --- a/editor/app/desktop/welcome/page.tsx +++ b/editor/app/desktop/welcome/page.tsx @@ -57,6 +57,7 @@ import { DesktopModelPicker, useModelPickerState, } from "@/scaffolds/desktop/shared/model-picker"; +import { useEndpointProviders } from "@/scaffolds/desktop/shared/registered-models"; import { useWorkspaceComposerCatalog } from "@/scaffolds/desktop/shared/use-workspace-composer-catalog"; import { workspaceWorkbenchHref } from "@/scaffolds/desktop/workbench/workspace-workbench-url"; @@ -110,6 +111,10 @@ export default function DesktopWelcomePage() { // empty id and yields an empty catalog. const catalog = useWorkspaceComposerCatalog(selectedId ?? ""); + // Configured endpoint providers (issue #806): registered local models + // join the welcome composer's picker too. + const endpoints = useEndpointProviders(); + // Model selection for the composer. No sessions here (the welcome page // never loads a chat), so this just holds the user's pick at the // default; it rides the handoff so the workspace chat's first turn runs @@ -117,6 +122,7 @@ export default function DesktopWelcomePage() { const { model_id: modelId, setModelId } = useModelPickerState({ current_id: null, sessions: [], + endpoints, }); const onOpen = useCallback(async () => { @@ -271,6 +277,7 @@ export default function DesktopWelcomePage() { } /> diff --git a/editor/lib/agent-chat/approval-resume.test.ts b/editor/lib/agent-chat/approval-resume.test.ts index 9b4e78959..7a9634aec 100644 --- a/editor/lib/agent-chat/approval-resume.test.ts +++ b/editor/lib/agent-chat/approval-resume.test.ts @@ -26,6 +26,21 @@ describe("buildApprovalResumeBody", () => { }); }); + it("pins the endpoint provider on resume — same rule as a normal send (#806)", () => { + // A resume re-enters /agent/run: without the pin, a registered local + // model id would cascade BYOK-first onto a provider that can't serve it. + const body = buildApprovalResumeBody({ + session_id: "ses_1", + model_id: "llama3.1:8b", + provider_id: "ollama", + mode: "accept-edits", + tool_call_id: "tc1", + approval_id: "ap1", + approved: true, + }); + expect(body.provider_id).toBe("ollama"); + }); + it("forwards a denial (approved: false) verbatim", () => { const body = buildApprovalResumeBody({ mode: "accept-edits", diff --git a/editor/lib/agent-chat/approval-resume.ts b/editor/lib/agent-chat/approval-resume.ts index 65404c1ce..e3c7209b1 100644 --- a/editor/lib/agent-chat/approval-resume.ts +++ b/editor/lib/agent-chat/approval-resume.ts @@ -18,6 +18,13 @@ import type { AgentMode } from "@/lib/desktop/bridge"; export type ApprovalResumeBody = { session_id?: string; model_id?: string; + /** + * Endpoint provider pin (issue #806) — same rule as a normal send: a + * resume re-enters `/agent/run`, so without the pin a registered local + * model id would cascade BYOK-first and land on a provider that cannot + * serve it. Omitted for catalog models. + */ + provider_id?: string; mode: AgentMode; approval_answer: { tool_call_id: string; @@ -29,6 +36,7 @@ export type ApprovalResumeBody = { export type ApprovalResumeArgs = { session_id?: string; model_id?: string; + provider_id?: string; mode: AgentMode; tool_call_id: string; approval_id: string; @@ -41,6 +49,7 @@ export function buildApprovalResumeBody( return { session_id: args.session_id, model_id: args.model_id, + provider_id: args.provider_id, mode: args.mode, approval_answer: { tool_call_id: args.tool_call_id, diff --git a/editor/lib/agent-chat/build-agent-send.test.ts b/editor/lib/agent-chat/build-agent-send.test.ts index 0de7a24c5..b40b3cf74 100644 --- a/editor/lib/agent-chat/build-agent-send.test.ts +++ b/editor/lib/agent-chat/build-agent-send.test.ts @@ -91,3 +91,42 @@ describe("buildAgentSend", () => { ).toBe(false); }); }); + +describe("buildAgentSend — endpoint provider pin (#806)", () => { + it("rides provider_id when the picked model is a registered endpoint model", () => { + const sendMessage = vi.fn(); + const send = buildAgentSend({ + sendMessage, + sessionId: "s1", + modelId: "llama3.1:8b", + providerId: "ollama", + }); + + send("hi"); + + expect(sendMessage).toHaveBeenCalledWith( + { text: "hi" }, + { + body: { + session_id: "s1", + model_id: "llama3.1:8b", + provider_id: "ollama", + }, + } + ); + }); + + it("omits provider_id for catalog models (BYOK cascade stays in charge)", () => { + const sendMessage = vi.fn(); + const send = buildAgentSend({ + sendMessage, + sessionId: "s1", + modelId: "anthropic/claude-sonnet-4.6", + }); + + send("hi"); + + const body = sendMessage.mock.calls[0][1]?.body; + expect(body).not.toHaveProperty("provider_id"); + }); +}); diff --git a/editor/lib/agent-chat/build-agent-send.ts b/editor/lib/agent-chat/build-agent-send.ts index 92c28b174..a3f556cff 100644 --- a/editor/lib/agent-chat/build-agent-send.ts +++ b/editor/lib/agent-chat/build-agent-send.ts @@ -16,6 +16,13 @@ import type { AgentMode } from "@grida/agent"; export type AgentSendBody = { session_id?: string; model_id: string; + /** + * Explicit provider pick (issue #806). Set when the chosen model is a + * registered endpoint model — provider resolution otherwise cascades + * BYOK-first, and a stored OpenRouter key would swallow a local model + * id it cannot serve. Omitted for catalog models (cascade is correct). + */ + provider_id?: string; /** Permission/supervision posture for the turn (RFC `permission modes`). */ mode?: AgentMode; /** Per-send skill subset (workspace tab); omitted on tab-less surfaces. */ @@ -32,15 +39,18 @@ export function buildAgentSend(opts: { sendMessage: SendMessageFn; sessionId: string | null; modelId: string; + /** Endpoint provider id serving `modelId`, when it's a registered model. */ + providerId?: string; mode?: AgentMode; skills?: string[]; }): (text: string, files?: FileUIPart[]) => void { - const { sendMessage, sessionId, modelId, mode, skills } = opts; + const { sendMessage, sessionId, modelId, providerId, mode, skills } = opts; return (text, files) => { const body: AgentSendBody = { session_id: sessionId ?? undefined, model_id: modelId, }; + if (providerId) body.provider_id = providerId; if (mode) body.mode = mode; if (skills) body.skills = skills; void sendMessage(files && files.length > 0 ? { text, files } : { text }, { diff --git a/editor/lib/agent-chat/web-daemon-bridge.ts b/editor/lib/agent-chat/web-daemon-bridge.ts index 7ede0dc6a..544c08d17 100644 --- a/editor/lib/agent-chat/web-daemon-bridge.ts +++ b/editor/lib/agent-chat/web-daemon-bridge.ts @@ -219,6 +219,13 @@ export function createWebDaemonBridge( set: (providerId, key) => client.secrets.set(providerId, key), delete: (providerId) => client.secrets.delete(providerId), }, + providers: { + list_endpoints: () => client.providers.list_endpoints(), + set_endpoint: (config) => client.providers.set_endpoint(config), + delete_endpoint: (id) => client.providers.delete_endpoint(id), + info: () => client.providers.info(), + probe_endpoint: (baseUrl) => client.providers.probe_endpoint(baseUrl), + }, agent: { run: (opts, onChunk) => diff --git a/editor/lib/desktop/bridge-boundary.test.ts b/editor/lib/desktop/bridge-boundary.test.ts index fbcdb61a3..74e2e607d 100644 --- a/editor/lib/desktop/bridge-boundary.test.ts +++ b/editor/lib/desktop/bridge-boundary.test.ts @@ -66,6 +66,7 @@ describe("/desktop bridge boundary", () => { "/secrets/has", "/secrets/set", "/secrets/delete", + "/providers/endpoints/", "/sessions", "/workspaces", "/files/", diff --git a/editor/lib/desktop/bridge.ts b/editor/lib/desktop/bridge.ts index 9e7316421..0228c6be3 100644 --- a/editor/lib/desktop/bridge.ts +++ b/editor/lib/desktop/bridge.ts @@ -21,13 +21,23 @@ import { BYOK_PROVIDER_METADATA, AGENT_TIERS, AGENT_SESSION_AGENT, + OLLAMA_ENDPOINT_PRESET, + mergeProbedModels, + resolveEndpointModel, + resolveEndpointModels, type AgentMode, type AgentUIMessageChunk, type AgentRunOptions, type ByokProviderId, + type ProviderId, type ChatMessageWithParts, type ChatSessionRow, type CreateSessionOptions, + type EndpointModelEntry, + type EndpointModelOverrides, + type EndpointModelSpec, + type EndpointProviderConfig, + type ProbedEndpointModel, type PatchSessionOptions, type RewindResult, type SessionListFilter, @@ -58,11 +68,21 @@ export { BYOK_PROVIDER_METADATA, AGENT_TIERS, AGENT_SESSION_AGENT, + OLLAMA_ENDPOINT_PRESET, + mergeProbedModels, + resolveEndpointModel, + resolveEndpointModels, + type EndpointModelEntry, + type EndpointModelOverrides, + type EndpointModelSpec, + type EndpointProviderConfig, + type ProbedEndpointModel, type AgentMode, type AgentUIMessageChunk, type AgentRunOptions, type ByokProviderId, type ByokProviderMetadata, + type ProviderId, type ChatMessageRow, type ChatMessageWithParts, type ChatModel, @@ -252,7 +272,11 @@ export namespace secrets { return BYOK_PROVIDER_METADATA; } - export async function hasKey(providerId: ByokProviderId): Promise { + // Provider ids here are BYOK ids OR configured endpoint ids (#806) — + // a keyed gateway stores its key under its endpoint id through these + // same helpers. The agent host validates membership; unknown ids 400. + + export async function hasKey(providerId: ProviderId): Promise { return await bridgeOrThrow().secrets.has(providerId); } @@ -263,7 +287,7 @@ export namespace secrets { * round-trip. */ export async function setKey( - providerId: ByokProviderId, + providerId: ProviderId, key: string ): Promise { if (key.trim().length === 0) { @@ -272,7 +296,7 @@ export namespace secrets { await bridgeOrThrow().secrets.set(providerId, key); } - export async function deleteKey(providerId: ByokProviderId): Promise { + export async function deleteKey(providerId: ProviderId): Promise { await bridgeOrThrow().secrets.delete(providerId); } @@ -287,9 +311,14 @@ export namespace secrets { * matches platform convention for destructive prompts. */ export async function confirmDeleteKey( - providerId: ByokProviderId + providerId: ProviderId, + /** Display name override — endpoint ids have no BYOK label. */ + displayLabel?: string ): Promise { - const label = BYOK_PROVIDER_LABELS[providerId]; + const label = + displayLabel ?? + BYOK_PROVIDER_LABELS[providerId as ByokProviderId] ?? + providerId; const choice = await bridgeOrThrow().dialog.confirm({ message: `Remove ${label} key?`, detail: @@ -302,6 +331,95 @@ export namespace secrets { } } +/* ─────────────────────── providers namespace ─────────────────── */ + +/** + * Endpoint provider config (issue #806) — user-configured OpenAI- + * compatible endpoints (Ollama preset, self-hosted gateways). Plain + * readable config, unlike `secrets`: the renderer may list configs back. + * A keyed gateway stores its key via the `secrets` namespace under the + * endpoint's id; this namespace never carries credentials. + * + * The bridge field is OPTIONAL (older desktop binaries) — UI must gate + * on {@link providers.isSupported}. + */ +export namespace providers { + export function isSupported(): boolean { + return getDesktopBridge()?.providers != null; + } + + export async function listEndpoints(): Promise { + const bridge = bridgeOrThrow().providers; + if (!bridge) return []; + return await bridge.list_endpoints(); + } + + export async function setEndpoint( + config: EndpointProviderConfig + ): Promise { + const bridge = bridgeOrThrow().providers; + if (!bridge) throw new DesktopBridgeMissingError(); + await bridge.set_endpoint(config); + } + + export async function deleteEndpoint(id: string): Promise { + const bridge = bridgeOrThrow().providers; + if (!bridge) throw new DesktopBridgeMissingError(); + await bridge.delete_endpoint(id); + } + + /** + * Discover the models an endpoint serves. The fetch happens on the + * agent host — the renderer's origin cannot reach a local Ollama + * directly (CORS). Throws when the bridge predates the surface or the + * endpoint is unreachable; callers fall back to manual entry. + */ + export async function probeEndpoint(baseUrl: string): Promise<{ + source: "ollama" | "openai"; + models: ProbedEndpointModel[]; + }> { + const bridge = bridgeOrThrow().providers; + if (!bridge?.probe_endpoint) throw new DesktopBridgeMissingError(); + return await bridge.probe_endpoint(baseUrl); + } + + /** + * Reveal `endpoints.json` (the hand-editable config — `overrides` for + * power users live there) in the OS file manager. Returns `false` when + * the surface isn't available (old binary, or the web daemon bridge + * which has no native shell) — callers hide the affordance. + */ + export async function revealConfigFile(): Promise { + if (!canRevealConfigFile()) return false; + const bridge = getDesktopBridge()!; + const { path } = await bridge.providers!.info(); + await bridge.shell.show_item_in_folder(path); + return true; + } + + /** Whether {@link revealConfigFile} can work in this host. */ + export function canRevealConfigFile(): boolean { + const bridge = getDesktopBridge(); + return Boolean(bridge?.providers?.info && bridge.caps.native.shell); + } + + /** + * Native confirm for the destructive "Remove endpoint" action — + * same convention as `secrets.confirmDeleteKey`. + */ + export async function confirmDeleteEndpoint(label: string): Promise { + const choice = await bridgeOrThrow().dialog.confirm({ + message: `Remove ${label}?`, + detail: + "The agent will stop using this endpoint and its registered models. You can add it back any time.", + buttons: ["Remove", "Cancel"], + default_id: 1, + cancel_id: 1, + }); + return choice === 0; + } +} + /* ───────────────────────── app namespace ────────────────────── */ export type DesktopAppInfo = { diff --git a/editor/scaffolds/desktop/ai-sidebar/chat.tsx b/editor/scaffolds/desktop/ai-sidebar/chat.tsx index eabb72ff3..4247629b1 100644 --- a/editor/scaffolds/desktop/ai-sidebar/chat.tsx +++ b/editor/scaffolds/desktop/ai-sidebar/chat.tsx @@ -31,7 +31,6 @@ import { } from "@app/ui/ai-elements/conversation"; import { cn } from "@app/ui/lib/utils"; import type { ComposerCatalog } from "@/kits/composer"; -import _models from "@grida/ai-models"; import { AGENT_SESSION_AGENT, sessions as bridgeSessions, @@ -61,9 +60,14 @@ import { import { ChatSessionPicker } from "../shared/chat-session-picker"; import { DesktopModelPicker, + ModelToolCallNotice, useModelPickerState, } from "../shared/model-picker"; import { DesktopContextMeter } from "../shared/context-meter"; +import { + registered_models, + useEndpointProviders, +} from "../shared/registered-models"; import { AgentComposerInput, type ComposerCommandAction, @@ -227,18 +231,24 @@ export function AISidebarChat({ className }: { className?: string }) { setMessages(chatSession.initial_messages); }, [chatSession.initial_messages, setMessages]); + // Configured endpoint providers (issue #806): their registered models + // join the picker and the capability gates below. + const endpoints = useEndpointProviders(); + // Flat model selection (ignores tiers). Seeds from the active // session's stored model and rides each send as `body.modelId`. const { model_id: modelId, setModelId } = useModelPickerState({ current_id: chatSession.current_id, sessions: chatSession.sessions, + endpoints, }); - // Whether the active model accepts image input — memoized so the catalog - // lookup doesn't re-scan on every render (only when the model changes). + // Whether the active model accepts image input — memoized so the + // registry lookup doesn't re-scan on every render (only when the model + // or endpoint list changes). const multimodal = useMemo( - () => _models.text.modelSpecById(modelId)?.multimodal ?? false, - [modelId] + () => registered_models.resolve(modelId, endpoints)?.multimodal ?? false, + [modelId, endpoints] ); // The active session row carries the rolled-up cost the context meter @@ -272,6 +282,7 @@ export function AISidebarChat({ className }: { className?: string }) { sendMessage, sessionId: chatSession.current_id, modelId, + providerId: registered_models.providerIdForModel(modelId, endpoints), }), }); @@ -440,6 +451,8 @@ export function AISidebarChat({ className }: { className?: string }) { + +
- + } diff --git a/editor/scaffolds/desktop/shared/context-meter.tsx b/editor/scaffolds/desktop/shared/context-meter.tsx index 727ef42c1..7758985eb 100644 --- a/editor/scaffolds/desktop/shared/context-meter.tsx +++ b/editor/scaffolds/desktop/shared/context-meter.tsx @@ -17,9 +17,8 @@ import { useMemo } from "react"; import type { UIMessage } from "ai"; -// `@grida/ai-models` is the framework-free catalog (renderer-safe, unlike -// the `@/lib/ai/models` server seam) — same import the model picker uses. -import _models from "@grida/ai-models"; +import type { EndpointProviderConfig } from "@/lib/desktop/bridge"; +import { registered_models } from "./registered-models"; import { Button } from "@app/ui/components/button"; import { Popover, @@ -70,14 +69,23 @@ export function DesktopContextMeter({ messages, modelId, costUsd, + endpoints = [], }: { messages: UIMessage[]; - /** Active model id — its catalog spec supplies the context window. */ + /** Active model id — its resolved spec supplies the context window. */ modelId: string; /** Real session cost so far, in USD. Shown when > 0. */ costUsd?: number; + /** Configured endpoint providers (issue #806) — registered local models + * resolve their real (often small) windows through these. */ + endpoints?: readonly EndpointProviderConfig[]; }) { - const contextWindow = _models.text.modelSpecById(modelId)?.contextWindow; + // Memoized: chat panels re-render per streamed token, and resolve() + // rebuilds the flattened spec list each call. + const contextWindow = useMemo( + () => registered_models.resolve(modelId, endpoints)?.contextWindow, + [modelId, endpoints] + ); const { usedTokens, maxTokens, diff --git a/editor/scaffolds/desktop/shared/model-picker.tsx b/editor/scaffolds/desktop/shared/model-picker.tsx index c0f87eebc..6ad33f926 100644 --- a/editor/scaffolds/desktop/shared/model-picker.tsx +++ b/editor/scaffolds/desktop/shared/model-picker.tsx @@ -1,5 +1,7 @@ /** - * Desktop model picker — flat list of every catalog model. + * Desktop model picker — flat list of every catalog model, plus any + * user-registered endpoint models (issue #806 — local Ollama, self- + * hosted gateways). * * The agent system is tier-based (4 tiers → 4 models), but the catalog * holds more models than the tiers map to, leaving some unreachable. @@ -11,7 +13,8 @@ "use client"; -import { useEffect, useRef, useState } from "react"; +import { useEffect, useMemo, useRef, useState } from "react"; +import { TriangleAlertIcon } from "lucide-react"; import { PromptInputSelect, PromptInputSelectContent, @@ -24,7 +27,11 @@ import { // providers (live keys) and is lint-blocked from the desktop renderer // (GRIDA-SEC-004). This package is pure data and renderer-safe. import _models, { TIER_MODEL_IDS } from "@grida/ai-models"; -import type { ChatSessionRow } from "@/lib/desktop/bridge"; +import type { + ChatSessionRow, + EndpointProviderConfig, +} from "@/lib/desktop/bridge"; +import { registered_models } from "./registered-models"; const catalog = _models.text.catalog; type CatalogId = _models.text.CatalogId; @@ -41,9 +48,13 @@ function isCatalogId(id: string | undefined | null): id is CatalogId { export function DesktopModelPicker({ value, onValueChange, + endpoints = [], }: { value: string; onValueChange: (modelId: string) => void; + /** Configured endpoint providers whose registered models join the list + * (grouped under the endpoint's label). */ + endpoints?: readonly EndpointProviderConfig[]; }) { return ( @@ -60,11 +71,57 @@ export function DesktopModelPicker({ {_models.text.displayLabel(m)} ))} + {endpoints.map((endpoint) => + endpoint.models.map((m) => ( + + {m.label ?? m.id} + + {" "} + · {endpoint.label ?? endpoint.id} + + + )) + )} ); } +/** + * Inline notice for a selected model that is marked `tool_call: false` + * (issue #806). The agent loop is tool-heavy (files, commands, todos) — + * gating is deliberately permissive (the run is not blocked), so the + * honest move is a visible expectation-setter, not a hard stop. + */ +export function ModelToolCallNotice({ + model_id: modelId, + endpoints, +}: { + model_id: string; + endpoints: readonly EndpointProviderConfig[]; +}) { + // Memoized: this renders inside chat panels that re-render per streamed + // token, and resolve() rebuilds the flattened spec list each call. + const spec = useMemo( + () => registered_models.resolve(modelId, endpoints), + [modelId, endpoints] + ); + if (!spec || spec.tool_call) return null; + return ( +
+ + + {spec.label} is marked as not supporting tool calls — the agent's + file, command, and planning abilities may not work with it. + +
+ ); +} + /** * Model selection state for a chat panel. Defaults to * {@link DEFAULT_MODEL_ID} (or `initial`, when a caller seeds one — e.g. @@ -78,15 +135,26 @@ export function useModelPickerState({ current_id: currentId, sessions, initial, + endpoints = [], }: { current_id: string | null; sessions: ChatSessionRow[]; /** Initial selection, applied only on first mount. Falls back to - * {@link DEFAULT_MODEL_ID} when absent or not a known catalog id. */ + * {@link DEFAULT_MODEL_ID} when absent or not a known model id. */ initial?: string; + /** Configured endpoint providers — their registered model ids count as + * known, so a session that ran on a local model re-seeds correctly. */ + endpoints?: readonly EndpointProviderConfig[]; }): { model_id: string; setModelId: (id: string) => void } { + const registeredIds = useMemo( + () => new Set(registered_models.specs(endpoints).map((m) => m.id)), + [endpoints] + ); + const isKnownId = (id: string | undefined | null): id is string => + isCatalogId(id) || (typeof id === "string" && registeredIds.has(id)); + const [modelId, setModelId] = useState( - isCatalogId(initial) ? initial : DEFAULT_MODEL_ID + isKnownId(initial) ? initial : DEFAULT_MODEL_ID ); // The session id we last seeded from. Re-seed only when the active id // changes — `undefined` means "never seeded" so the first run fires. @@ -104,9 +172,18 @@ export function useModelPickerState({ // committing, so we don't lock in the default and skip the real seed. if (!row) return; const stored = row.model?.model_id; - if (isCatalogId(stored)) setModelId(stored); - seededFor.current = currentId; - }, [currentId, sessions]); + if (isKnownId(stored)) { + setModelId(stored); + seededFor.current = currentId; + return; + } + // Stored id not (yet) known. Endpoints load async — when the session + // ran on a registered local model, leave the seed open so the + // `registeredIds` dep can complete it once the endpoint list lands. + // A session with NO stored model is seeded-done immediately. + if (!stored) seededFor.current = currentId; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [currentId, sessions, registeredIds]); return { model_id: modelId, setModelId }; } diff --git a/editor/scaffolds/desktop/shared/registered-models.ts b/editor/scaffolds/desktop/shared/registered-models.ts new file mode 100644 index 000000000..cc8499b86 --- /dev/null +++ b/editor/scaffolds/desktop/shared/registered-models.ts @@ -0,0 +1,79 @@ +/** + * Registered (endpoint) models in the desktop renderer — issue #806. + * + * One fetch surface + pure resolution helpers shared by the model + * picker, the capability gates (multimodal / tool_call), and the context + * meter, so every consumer resolves a model id the same way: static + * catalog ∪ user-registered endpoint models via + * `models.text.registry.resolve`. + */ + +"use client"; + +import { useEffect, useState } from "react"; +import _models from "@grida/ai-models"; +import { resolveEndpointModels } from "@grida/agent"; +import { + providers, + type EndpointModelSpec, + type EndpointProviderConfig, +} from "@/lib/desktop/bridge"; + +export namespace registered_models { + /** Flatten endpoint configs into the registry's custom-spec list — + * OVERRIDE-RESOLVED, mirroring the host's `registeredModels()`. */ + export function specs( + endpoints: readonly EndpointProviderConfig[] + ): EndpointModelSpec[] { + return endpoints.flatMap((endpoint) => resolveEndpointModels(endpoint)); + } + + /** Resolve a model id over catalog ∪ registered (normalized defaults). */ + export function resolve( + modelId: string, + endpoints: readonly EndpointProviderConfig[] + ): _models.text.registry.ResolvedModelSpec | undefined { + return _models.text.registry.resolve(modelId, specs(endpoints)); + } + + /** + * The endpoint provider id serving `modelId`, or `undefined` for + * catalog models. Rides each send as `provider_id` so an explicit + * local-model pick can't be swallowed by the BYOK-first cascade (a + * stored OpenRouter key cannot serve `llama3.1:8b`). + */ + export function providerIdForModel( + modelId: string, + endpoints: readonly EndpointProviderConfig[] + ): string | undefined { + return endpoints.find((endpoint) => + endpoint.models.some((m) => m.id === modelId) + )?.id; + } +} + +/** + * The configured endpoint providers, fetched once per mount. `[]` while + * loading, outside the desktop renderer, or on an old binary without the + * bridge surface — every consumer degrades to catalog-only behavior. + */ +export function useEndpointProviders(): EndpointProviderConfig[] { + const [endpoints, setEndpoints] = useState([]); + useEffect(() => { + let cancelled = false; + if (!providers.isSupported()) return; + providers + .listEndpoints() + .then((list) => { + if (!cancelled) setEndpoints(list); + }) + .catch(() => { + // Endpoint config is additive — a failed fetch degrades to + // catalog-only models, never blocks the chat. + }); + return () => { + cancelled = true; + }; + }, []); + return endpoints; +} diff --git a/editor/scaffolds/desktop/workbench/agent-pane.tsx b/editor/scaffolds/desktop/workbench/agent-pane.tsx index 2ed15cb72..fc48213f7 100644 --- a/editor/scaffolds/desktop/workbench/agent-pane.tsx +++ b/editor/scaffolds/desktop/workbench/agent-pane.tsx @@ -50,7 +50,6 @@ import { type WelcomeHandoff, } from "@/lib/desktop/welcome-handoff"; import { useDesktopAgentFocusSession } from "@/lib/desktop/agent-focus-session"; -import _models from "@grida/ai-models"; import { buildAgentSend, buildApprovalResumeBody, @@ -76,10 +75,15 @@ import { QueuedMessages } from "../shared/queued-messages"; import { ChatSessionPicker } from "../shared/chat-session-picker"; import { DesktopModelPicker, + ModelToolCallNotice, useModelPickerState, } from "../shared/model-picker"; import { DesktopModePicker, useModePickerState } from "../shared/mode-picker"; import { DesktopContextMeter } from "../shared/context-meter"; +import { + registered_models, + useEndpointProviders, +} from "../shared/registered-models"; import { AgentComposerInput, type ComposerCommandAction, @@ -345,6 +349,10 @@ function AgentPaneContent({ setMessages(chatSession.initial_messages); }, [chatSession.initial_messages, setMessages]); + // Configured endpoint providers (issue #806): their registered models + // join the picker and the capability gates below. + const endpoints = useEndpointProviders(); + // Flat model selection (ignores tiers). Seeds from the welcome // composer's pick on a handed-off fresh session, otherwise from the // active session's stored model, and rides each send as `body.modelId`. @@ -352,6 +360,7 @@ function AgentPaneContent({ current_id: chatSession.current_id, sessions: chatSession.sessions, initial: handoff?.model_id, + endpoints, }); // Permission/supervision posture (RFC `permission modes`). Seeds from the @@ -361,11 +370,12 @@ function AgentPaneContent({ sessions: chatSession.sessions, }); - // Whether the active model accepts image input — memoized so the catalog - // lookup doesn't re-scan on every render (only when the model changes). + // Whether the active model accepts image input — memoized so the + // registry lookup doesn't re-scan on every render (only when the model + // or endpoint list changes). const multimodal = useMemo( - () => _models.text.modelSpecById(modelId)?.multimodal ?? false, - [modelId] + () => registered_models.resolve(modelId, endpoints)?.multimodal ?? false, + [modelId, endpoints] ); // The active session row carries the rolled-up cost the context meter @@ -395,6 +405,10 @@ function AgentPaneContent({ // the optimistic mirror, shared with `ai-sidebar/chat.tsx`. Skills ride the // live `send` from the active tab; a core-drained turn uses the session's // discovered skills (no per-send subset — the renderer has no tab there). + // Endpoint provider pin for the active model (issue #806) — rides every + // run-entering body: normal sends AND approval resumes below. + const providerId = registered_models.providerIdForModel(modelId, endpoints); + const { queued, cancel: cancelQueued, @@ -408,6 +422,7 @@ function AgentPaneContent({ sendMessage, sessionId: chatSession.current_id, modelId, + providerId, mode, skills: skillsForActiveTab(activeRelPath), }), @@ -540,6 +555,7 @@ function AgentPaneContent({ body: buildApprovalResumeBody({ session_id: chatSession.current_id ?? undefined, model_id: modelId, + provider_id: providerId, mode, tool_call_id: pending.toolCallId, approval_id: pending.approvalId, @@ -547,7 +563,7 @@ function AgentPaneContent({ }), }); }, - [chat, chatSession.current_id, modelId, mode] + [chat, chatSession.current_id, modelId, providerId, mode] ); // A pending supervised approval (the model called a mutating command in @@ -617,6 +633,8 @@ function AgentPaneContent({ + + {/* Hidden while the session is busy: clicking Allow/Deny starts the resume turn (busy → true), so the bar vanishes on click — instant feedback, no optimistic message mutation needed. */} @@ -636,11 +654,16 @@ function AgentPaneContent({ toolbar={ <> - + } diff --git a/packages/grida-ai-agent/README.md b/packages/grida-ai-agent/README.md index e5efedf96..ccfaba3f8 100644 --- a/packages/grida-ai-agent/README.md +++ b/packages/grida-ai-agent/README.md @@ -43,11 +43,14 @@ public subpath; workspace bindings use it in-process. The perimeter that keeps this package small. A feature request that crosses one of these is the wrong tool, not a missing feature. -- **Not a general model-provider router.** V1 provider selection is - BYOK-only and isolated to the node-only `providers/` layer - (OpenRouter → AI Gateway). The agent + runtime core never import +- **Not a general model-provider router.** Provider selection is + isolated to the node-only `providers/` layer: the BYOK key slots + (OpenRouter → AI Gateway) plus ONE generalized OpenAI-compatible + endpoint type (`{base_url, optional key, registered models}` — Ollama + is the preset; issue #806). The agent + runtime core never import selection; they receive a resolved `ModelFactory`. There is no - registry for arbitrary third-party providers. + registry for arbitrary third-party providers — new hosted providers are + new BYOK slots, not config. - **Not a hosted model gateway.** The package does not proxy model calls through grida.co, own OAuth sessions, or mint hosted provider tokens. - **Not a billing or entitlement engine.** The package forwards per-step diff --git a/packages/grida-ai-agent/src/__public-api__.test.ts b/packages/grida-ai-agent/src/__public-api__.test.ts index 74bd82854..7929d5f57 100644 --- a/packages/grida-ai-agent/src/__public-api__.test.ts +++ b/packages/grida-ai-agent/src/__public-api__.test.ts @@ -180,6 +180,34 @@ describe("@grida/agent public API", () => { expect(row.agent).toBe("grida"); }); + it("exposes the endpoint-provider contract (issue #806)", () => { + expect(root.OLLAMA_ENDPOINT_PRESET).toEqual({ + id: "ollama", + label: "Ollama", + base_url: "http://localhost:11434/v1", + }); + expect(typeof root.isValidEndpointProviderId).toBe("function"); + expect(typeof root.validateEndpointProviderConfig).toBe("function"); + expect(typeof root.mergeProbedModels).toBe("function"); + expect(root.isByokProviderId("openrouter")).toBe(true); + expect(root.isByokProviderId("ollama")).toBe(false); + const config: root.EndpointProviderConfig = { + ...root.OLLAMA_ENDPOINT_PRESET, + models: [{ id: "llama3.1:8b", tool_call: true }], + }; + const model: root.EndpointModelSpec = config.models[0]; + expect(model.id).toBe("llama3.1:8b"); + // The model-id and provider-id wire types are open: a registered + // local id type-checks (the runtime gate still validates it). + const localModel: AgentModelId = "llama3.1:8b"; + const localRun: AgentRunOptions = { + messages: [], + provider_id: "ollama", + model_id: localModel, + }; + expect(localRun.provider_id).toBe("ollama"); + }); + it("does not expose internal runtime/provider/server modules from the root", () => { expect("AgentRuntime" in root).toBe(false); expect("StreamRegistry" in root).toBe(false); diff --git a/packages/grida-ai-agent/src/http/routes/handshake.ts b/packages/grida-ai-agent/src/http/routes/handshake.ts index 708288ef3..b6ef3f823 100644 --- a/packages/grida-ai-agent/src/http/routes/handshake.ts +++ b/packages/grida-ai-agent/src/http/routes/handshake.ts @@ -30,6 +30,7 @@ const SUPPORTS_TAGS: Record = { agent: "agent@1", workspaces: "workspaces@1", sessions: "sessions@1", + providers: "providers@1", shell: "shell@1", }; diff --git a/packages/grida-ai-agent/src/http/routes/providers.ts b/packages/grida-ai-agent/src/http/routes/providers.ts new file mode 100644 index 000000000..a0fd5519b --- /dev/null +++ b/packages/grida-ai-agent/src/http/routes/providers.ts @@ -0,0 +1,126 @@ +/** + * GRIDA-SEC-004 — `/providers/endpoints/*` routes (issue #806). + * + * CRUD over the endpoint provider config store: user-configured + * OpenAI-compatible endpoints (Ollama preset, self-hosted gateways). + * + * Unlike `/secrets/*`, configs ARE readable back to the client — an + * endpoint config is plain config (base URL + registered models), not a + * credential. The optional API key for a keyed gateway still rides the + * `/secrets/*` surface under the endpoint's id and never appears here. + * + * Threat note (reviewed): `base_url` is user-controlled egress — once an + * endpoint is configured and picked, conversation content flows to it. + * That is the feature (same trust model as BYOK: the desktop user points + * their own agent at their own endpoint), and the writer is the same + * authenticated loopback client that could already set a BYOK key. The + * validator pins the shape (http(s) URL, bounded sizes) so a config + * write can't smuggle arbitrary blobs. + */ + +import type { Hono } from "hono"; +import { + parseEndpointBaseUrl, + validateEndpointProviderConfig, + type EndpointProviderConfig, +} from "../../protocol/endpoints"; +import type { EndpointProvidersStore } from "../../providers/endpoints"; +import { probeEndpointModels } from "../../providers/probe"; +import type { SecretsStore } from "../../secrets"; +import { body, v } from "../validate"; + +export type ProvidersRoutesDeps = { + endpoints: EndpointProvidersStore; + /** + * When present, deleting an endpoint also deletes the key stored under + * its id. Without this, the key would be orphaned in auth.json: the + * `/secrets/*` allowlist only accepts CONFIGURED endpoint ids, so the + * leftover would be undeletable — and re-creating the same endpoint id + * later would silently reuse the stale credential. + */ + secrets?: SecretsStore; + /** Probe override for tests. Defaults to {@link probeEndpointModels}. */ + probe?: typeof probeEndpointModels; +}; + +export function registerProvidersRoutes(app: Hono, deps: ProvidersRoutesDeps) { + const { endpoints, secrets } = deps; + const probe = deps.probe ?? probeEndpointModels; + + app.post("/providers/endpoints/list", async (c) => { + const list: EndpointProviderConfig[] = await endpoints.list(); + return c.json(list); + }); + + // Where the config JSON lives — the settings UI links developers to + // the hand-editable file (the `overrides` escape hatch lives there). + // Absolute paths are an accepted part of this surface (cf. workspaces). + app.post("/providers/endpoints/info", (c) => + c.json({ path: endpoints.filePath }) + ); + + app.post("/providers/endpoints/set", async (c) => { + let raw: unknown; + try { + raw = await c.req.json(); + } catch { + raw = undefined; + } + const config = (raw as { config?: unknown } | undefined)?.config; + const result = validateEndpointProviderConfig(config); + if (!result.ok) { + return c.json({ error: `config ${result.error}` }, 400); + } + try { + await endpoints.set(result.config); + } catch (err) { + const message = err instanceof Error ? err.message : String(err); + // The store's own rejections (re-validation, entry cap) are client + // errors; anything else is a persistence failure (disk full, no + // write permission) — the payload wasn't the problem. + if (message.startsWith("[agent-host-endpoints]")) { + return c.json({ error: message }, 400); + } + console.error(`[agent-host-providers] endpoint set failed: ${message}`); + return c.json({ error: "failed to persist endpoint config" }, 500); + } + console.log( + `[agent-host-providers] endpoint set id=${result.config.id} models=${result.config.models.length}` + ); + return c.json({ ok: true }); + }); + + app.post("/providers/endpoints/delete", async (c) => { + const r = await body(c, { id: v.string }); + if (!r.ok) return r.res; + await endpoints.delete(r.data.id); + // The endpoint's key (if any) goes with it — see the deps doc. Both + // deletes are idempotent, so a partial failure is safe to retry. + await secrets?.delete(r.data.id); + console.log(`[agent-host-providers] endpoint delete id=${r.data.id}`); + return c.json({ ok: true }); + }); + + // Model discovery (see providers/probe.ts for the threat note): the + // host fetches the endpoint's own model listing and returns the + // PARSED rows — never the raw body. Takes a base_url (not a stored + // id) so the settings flow can prefill before the config is saved. + app.post("/providers/endpoints/probe", async (c) => { + const r = await body(c, { base_url: v.string }); + if (!r.ok) return r.res; + // Malformed input is the caller's fault (400); only a well-formed + // URL that doesn't answer is an upstream failure (502). + const parsed = parseEndpointBaseUrl(r.data.base_url); + if (!parsed.ok) { + return c.json({ error: parsed.error }, 400); + } + const result = await probe(parsed.base_url); + if (!result.ok) { + return c.json({ error: result.error }, 502); + } + console.log( + `[agent-host-providers] probe source=${result.source} models=${result.models.length}` + ); + return c.json({ source: result.source, models: result.models }); + }); +} diff --git a/packages/grida-ai-agent/src/http/routes/secrets.ts b/packages/grida-ai-agent/src/http/routes/secrets.ts index 9b23965d1..84cc774e2 100644 --- a/packages/grida-ai-agent/src/http/routes/secrets.ts +++ b/packages/grida-ai-agent/src/http/routes/secrets.ts @@ -11,8 +11,10 @@ * not the key itself. * * Allowed provider ids — a closed set: - * - `openrouter` - * - `vercel` + * - the BYOK ids (`openrouter`, `vercel`) + * - ids of CONFIGURED endpoint providers (issue #806) — a self-hosted + * gateway may need a key; Ollama doesn't, but its slot still accepts + * one harmlessly. * * Any other id is rejected with a 400 so a typo doesn't silently create a * never-used auth.json entry. @@ -26,28 +28,54 @@ import type { Hono } from "hono"; import { BYOK_PROVIDER_IDS } from "../../protocol/provider-ids"; +import { + isKnownProviderId, + type EndpointProvidersStore, +} from "../../providers/endpoints"; import type { SecretsStore } from "../../secrets"; import { body, v } from "../validate"; export type SecretsRoutesDeps = { store: SecretsStore; + /** When present, ids of configured endpoint providers are also allowed + * (a keyed self-hosted gateway stores its key under its endpoint id). */ + endpoints?: EndpointProvidersStore; }; export function registerSecretsRoutes(app: Hono, deps: SecretsRoutesDeps) { - const { store } = deps; + const { store, endpoints } = deps; + + const allowedProviderId = async ( + id: string + ): Promise<{ ok: true } | { ok: false; res: Response }> => { + if (await isKnownProviderId(id, endpoints)) return { ok: true }; + return { + ok: false, + res: Response.json( + { + error: `provider_id must be one of: ${BYOK_PROVIDER_IDS.join(", ")}, or a configured endpoint id`, + }, + { status: 400 } + ), + }; + }; app.post("/secrets/has", async (c) => { - const r = await body(c, { provider_id: v.oneOf(BYOK_PROVIDER_IDS) }); + const r = await body(c, { provider_id: v.string }); if (!r.ok) return r.res; + const allowed = await allowedProviderId(r.data.provider_id); + if (!allowed.ok) return allowed.res; return c.json({ has: await store.has(r.data.provider_id) }); }); app.post("/secrets/set", async (c) => { const r = await body(c, { - provider_id: v.oneOf(BYOK_PROVIDER_IDS), + provider_id: v.string, key: v.stringAllowEmpty, }); if (!r.ok) return r.res; + const allowed = await allowedProviderId(r.data.provider_id); + if (!allowed.ok) return allowed.res; if (r.data.key.trim().length === 0) { return c.json({ error: "key must not be empty or whitespace-only" }, 400); } @@ -57,8 +85,10 @@ export function registerSecretsRoutes(app: Hono, deps: SecretsRoutesDeps) { }); app.post("/secrets/delete", async (c) => { - const r = await body(c, { provider_id: v.oneOf(BYOK_PROVIDER_IDS) }); + const r = await body(c, { provider_id: v.string }); if (!r.ok) return r.res; + const allowed = await allowedProviderId(r.data.provider_id); + if (!allowed.ok) return allowed.res; await store.delete(r.data.provider_id); console.log(`[agent-host-secrets] delete providerId=${r.data.provider_id}`); return c.json({ ok: true }); diff --git a/packages/grida-ai-agent/src/http/server.ts b/packages/grida-ai-agent/src/http/server.ts index d735d9ca2..413352665 100644 --- a/packages/grida-ai-agent/src/http/server.ts +++ b/packages/grida-ai-agent/src/http/server.ts @@ -12,6 +12,7 @@ import { import { registerFilesRoutes } from "./routes/files"; import { registerRecentRoutes } from "./routes/recent"; import { registerSecretsRoutes } from "./routes/secrets"; +import { registerProvidersRoutes } from "./routes/providers"; import { registerAgentRoutes } from "./routes/agent"; import { registerWorkspacesRoutes } from "./routes/workspaces"; import { registerSessionsRoutes } from "./routes/sessions"; @@ -19,6 +20,7 @@ import { FileRegistry } from "../files/registry"; import { RecentStore } from "../files/recent"; import { AuthStore } from "../auth/file"; import { SecretsStore } from "../secrets"; +import { EndpointProvidersStore } from "../providers/endpoints"; import { WorkspaceRegistry } from "../workspaces"; import { openSessionsDb } from "../session/db"; import { SessionsStore } from "../session/store"; @@ -118,6 +120,9 @@ export function buildServer(opts: ServerOptions): BuiltServer { const workspaceRegistry = new WorkspaceRegistry(opts.user_data_path); const authStore = new AuthStore(opts.user_data_path); const secretsStore = new SecretsStore(authStore); + // Endpoint provider configs (issue #806): plain config beside the + // secrets store, persisted at ${userData}/endpoints.json. + const endpointsStore = new EndpointProvidersStore(opts.user_data_path); // Chat sessions: SQLite at ${userData}/sessions.db. Opened once per // agent-host launch and closed via the returned cleanup. WAL mode in // sessions/db.ts lets a CLI inspector read concurrently. @@ -135,6 +140,13 @@ export function buildServer(opts: ServerOptions): BuiltServer { if (opts.capabilities.secrets) { registerSecretsRoutes(app, { store: secretsStore, + endpoints: endpointsStore, + }); + } + if (opts.capabilities.providers) { + registerProvidersRoutes(app, { + endpoints: endpointsStore, + secrets: secretsStore, }); } // Agent runtime owns the run loop + the in-flight stream registry. @@ -156,6 +168,7 @@ export function buildServer(opts: ServerOptions): BuiltServer { } const runtime = new AgentRuntime({ secrets: secretsStore, + endpoints: endpointsStore, workspace_registry: workspaceRegistry, sessions_store: sessionsStore, streams: opts.stream_registry, diff --git a/packages/grida-ai-agent/src/index.ts b/packages/grida-ai-agent/src/index.ts index 3ec428a91..e115c113f 100644 --- a/packages/grida-ai-agent/src/index.ts +++ b/packages/grida-ai-agent/src/index.ts @@ -5,9 +5,25 @@ export { BYOK_PROVIDER_METADATA, BYOK_PROVIDER_IDS, + isByokProviderId, type ByokProviderMetadata, type ByokProviderId, + type ProviderId, } from "./protocol/provider-ids"; +export { + OLLAMA_ENDPOINT_PRESET, + isValidEndpointProviderId, + mergeProbedModels, + resolveEndpointModel, + resolveEndpointModels, + validateEndpointProviderConfig, + type EndpointModelEntry, + type EndpointModelOverrides, + type EndpointModelSpec, + type EndpointProviderConfig, + type ProbedEndpointModel, + type ProbeMergeResult, +} from "./protocol/endpoints"; export { AGENT_SERVER_PROTOCOL, AGENT_SERVER_DEFAULT_CAPABILITIES, diff --git a/packages/grida-ai-agent/src/neutral-globals.d.ts b/packages/grida-ai-agent/src/neutral-globals.d.ts index 5511d4a05..3399ee702 100644 --- a/packages/grida-ai-agent/src/neutral-globals.d.ts +++ b/packages/grida-ai-agent/src/neutral-globals.d.ts @@ -6,3 +6,10 @@ declare const console: { declare function setTimeout(handler: () => void, timeout?: number): unknown; declare function clearTimeout(handle: unknown): void; + +/** WHATWG URL — universal (browsers + Node). Only the members the neutral + * surface touches (endpoint base_url validation). */ +declare class URL { + constructor(url: string, base?: string | URL); + protocol: string; +} diff --git a/packages/grida-ai-agent/src/protocol/endpoints.ts b/packages/grida-ai-agent/src/protocol/endpoints.ts new file mode 100644 index 000000000..6d45a34c7 --- /dev/null +++ b/packages/grida-ai-agent/src/protocol/endpoints.ts @@ -0,0 +1,427 @@ +/** + * Custom OpenAI-compatible endpoint providers (issue #806 — local LLMs). + * + * Client-safe identity + config contract for user-configured endpoints. + * Local **Ollama** is the flagship preset; any OpenAI-compatible gateway + * (LiteLLM, vLLM, an Azure-compatible proxy, …) fits the same shape. This + * is the package's ONE generalized endpoint-provider type — presets + * instantiate it; we deliberately do not grow an opencode-style + * config-declared provider registry (anti-goal: not a general + * model-provider router). + * + * An endpoint config is **plain config, not a secret**: a base URL plus + * the models the user registered for it. When a gateway needs an API key, + * the key lives in the `SecretsStore` under the endpoint's id (same + * presence/set/delete-only discipline as BYOK keys, GRIDA-SEC-003/004) — + * never inside this config, so the config can ride readable storage, + * routes, and the renderer bridge. + */ + +import type { models } from "@grida/ai-models"; +import { isByokProviderId } from "./provider-ids"; + +/** A model spec consumable by the open registry — `@grida/ai-models`' + * custom spec (cost optional, capability flags explicit). This is the + * RESOLVED shape; the stored shape is {@link EndpointModelEntry}. */ +export type EndpointModelSpec = models.text.registry.CustomModelSpec; + +/** + * Sticky human corrections for a model entry. Detection refresh NEVER + * writes these — they exist for the "the endpoint reports a wrong value" + * case and are set by hand-editing `endpoints.json` (or by the settings + * inputs shown when detection has nothing). Resolution order: + * override → detected → registry default. + */ +export type EndpointModelOverrides = Pick< + EndpointModelSpec, + "contextWindow" | "tool_call" | "multimodal" +>; + +/** + * A model as STORED on an endpoint config. The top-level capability + * fields (`tool_call`, `contextWindow`, `multimodal`) are + * detection-owned: probe refresh overwrites them freely. Human + * corrections live in {@link EndpointModelOverrides} so a refresh can + * never clobber them. Resolve with {@link resolveEndpointModel} before + * feeding the registry. + */ +export type EndpointModelEntry = EndpointModelSpec & { + overrides?: EndpointModelOverrides; +}; + +/** + * A user-configured OpenAI-compatible endpoint provider. + * + * Resolvable (usable for a run) only when `models` is non-empty — an + * endpoint saved with just a base URL is valid config but not a provider + * the resolver will pick. + */ +export type EndpointProviderConfig = { + /** Stable id (`ollama`, `litellm`, …). See {@link ENDPOINT_PROVIDER_ID_PATTERN}. */ + id: string; + /** Display label. Falls back to the id. */ + label?: string; + /** OpenAI-compatible base URL, e.g. `http://localhost:11434/v1`. */ + base_url: string; + /** Models this endpoint serves. */ + models: EndpointModelEntry[]; + /** + * The model every tier resolves to when a run doesn't pick an explicit + * model (the agent's tier→catalog map is meaningless to a local + * endpoint — background subagents like the titler/compactor must land + * on a model this endpoint actually serves). Defaults to `models[0]`. + */ + default_model_id?: string; +}; + +/** Apply {@link EndpointModelOverrides} onto the detected fields — + * override → detected (→ registry default downstream). */ +export function resolveEndpointModel( + entry: EndpointModelEntry +): EndpointModelSpec { + const { overrides, ...detected } = entry; + return { + ...detected, + contextWindow: overrides?.contextWindow ?? detected.contextWindow, + tool_call: overrides?.tool_call ?? detected.tool_call, + multimodal: overrides?.multimodal ?? detected.multimodal, + }; +} + +/** All of an endpoint's models, override-resolved — the custom half of + * the model-registry seam. */ +export function resolveEndpointModels( + config: EndpointProviderConfig +): EndpointModelSpec[] { + return config.models.map(resolveEndpointModel); +} + +/** + * The model a model_id-less run on this endpoint executes — explicit + * `default_model_id`, falling back to the first registered model. THE + * one source of the default-model rule: the provider factory and the + * runtime's limits resolution must agree on it, or compaction limits get + * computed for a different model than the one that actually runs. + * `undefined` ⇔ the endpoint has no models and is not resolvable. + */ +export function endpointDefaultModelId( + config: EndpointProviderConfig +): string | undefined { + return config.default_model_id ?? config.models[0]?.id; +} + +/** + * The Ollama preset — the "no signup, no key" path. `ollama serve` + * exposes an OpenAI-compatible API at this base URL; no API key exists + * or is sent. + */ +export const OLLAMA_ENDPOINT_PRESET = { + id: "ollama", + label: "Ollama", + base_url: "http://localhost:11434/v1", +} as const; + +/** + * A model discovered by probing an endpoint (issue #806 — `POST + * /providers/endpoints/probe`). Carries only what the endpoint actually + * REPORTS: Ollama's `/api/tags` exposes ids + capability tags, + * `/api/ps` / `/api/show` expose the context window; a generic + * OpenAI-compatible `/models` exposes ids only. + */ +export type ProbedEndpointModel = { + id: string; + /** Whether the endpoint reports native tool-calling support. Absent + * when the endpoint doesn't expose capabilities. */ + tool_call?: boolean; + /** + * Context window in tokens, when the endpoint reports one. For a + * LOADED Ollama model this is the server's actual allocation + * (`/api/ps` `context_length`); otherwise the model's maximum + * (`/api/show` `model_info`). Absent when neither reports. + */ + contextWindow?: number; +}; + +export type ProbeMergeResult = { + models: EndpointModelEntry[]; + /** Count of models the probe found that the config didn't know + * (appended at the end, detection fields prefilled). */ + discovered: number; + /** Count of existing entries whose detected fields changed. */ + updated: number; +}; + +/** + * Apply a probe result onto an endpoint's stored models — the executable + * form of the detection-owned contract on {@link EndpointModelEntry}: + * probed values overwrite the top-level detected fields (a silent probe — + * e.g. an ids-only gateway — keeps the previous detection), `overrides` + * are NEVER written, and models the probe discovered are appended. + * Pure; shared by every surface that refreshes detection. + */ +export function mergeProbedModels( + models: readonly EndpointModelEntry[], + probed: readonly ProbedEndpointModel[] +): ProbeMergeResult { + const probedById = new Map(probed.map((m) => [m.id, m])); + let updated = 0; + const refreshed = models.map((m): EndpointModelEntry => { + const p = probedById.get(m.id); + if (!p) return m; + const next: EndpointModelEntry = { + ...m, + tool_call: p.tool_call ?? m.tool_call, + contextWindow: p.contextWindow ?? m.contextWindow, + }; + if ( + next.contextWindow !== m.contextWindow || + next.tool_call !== m.tool_call + ) { + updated += 1; + } + return next; + }); + const known = new Set(models.map((m) => m.id)); + const discovered = probed + .filter((m) => !known.has(m.id)) + .map( + (m): EndpointModelEntry => ({ + id: m.id, + tool_call: m.tool_call, + contextWindow: m.contextWindow, + }) + ); + return { + models: [...refreshed, ...discovered], + discovered: discovered.length, + updated, + }; +} + +/** + * Endpoint ids: short lowercase slugs. Must not collide with the BYOK + * provider ids — both share the provider-id namespace on sessions, + * run options, and the secrets store. + */ +const ENDPOINT_PROVIDER_ID_PATTERN = /^[a-z][a-z0-9_-]{0,31}$/; + +export function isValidEndpointProviderId(id: string): boolean { + return ENDPOINT_PROVIDER_ID_PATTERN.test(id) && !isByokProviderId(id); +} + +/** Narrow + pin an endpoint base URL: http(s) only. Shared by the config + * validator and the probe so the two boundaries can't drift. `base_url` + * is the TRIMMED input string (whitespace padding would survive `new + * URL` parsing yet break the string-concatenated request base later) — + * but never `url.href`, no other normalization surprises. */ +export function parseEndpointBaseUrl( + raw: unknown +): { ok: true; base_url: string; url: URL } | { ok: false; error: string } { + if (typeof raw !== "string" || raw.length > MAX_BASE_URL_LEN) { + return { ok: false, error: "base_url must be a string" }; + } + const trimmed = raw.trim(); + if (trimmed.length === 0) { + return { ok: false, error: "base_url must be a valid URL" }; + } + let url: URL; + try { + url = new URL(trimmed); + } catch { + return { ok: false, error: "base_url must be a valid URL" }; + } + if (url.protocol !== "http:" && url.protocol !== "https:") { + return { ok: false, error: "base_url must be http(s)" }; + } + return { ok: true, base_url: trimmed, url }; +} + +/** Bounds that keep a config a config (not an unbounded blob). */ +const MAX_MODELS = 64; +const MAX_MODEL_ID_LEN = 128; +const MAX_LABEL_LEN = 64; +const MAX_BASE_URL_LEN = 2048; +const MAX_TOKEN_LIMIT = 100_000_000; + +export type EndpointConfigValidation = + | { ok: true; config: EndpointProviderConfig } + | { ok: false; error: string }; + +/** + * Narrow an untrusted value to an {@link EndpointProviderConfig}. + * + * Shared by the store (load-time hygiene) and the HTTP route (write-time + * 400s), so a config that persisted always re-validates. Returns a fresh + * object holding only known fields — unknown keys are dropped, never + * round-tripped. + */ +export function validateEndpointProviderConfig( + raw: unknown +): EndpointConfigValidation { + if (!raw || typeof raw !== "object" || Array.isArray(raw)) { + return { ok: false, error: "config must be an object" }; + } + const c = raw as Record; + + if (typeof c.id !== "string" || !isValidEndpointProviderId(c.id)) { + return { + ok: false, + error: + "id must be a short lowercase slug and must not collide with a BYOK provider id", + }; + } + + if ( + c.label !== undefined && + (typeof c.label !== "string" || c.label.length > MAX_LABEL_LEN) + ) { + return { ok: false, error: `label must be a string ≤ ${MAX_LABEL_LEN}` }; + } + + const baseUrl = parseEndpointBaseUrl(c.base_url); + if (!baseUrl.ok) return baseUrl; + + if (!Array.isArray(c.models) || c.models.length > MAX_MODELS) { + return { ok: false, error: `models must be an array of ≤ ${MAX_MODELS}` }; + } + const modelSpecs: EndpointModelEntry[] = []; + const seen = new Set(); + for (const m of c.models) { + const validated = validateModelEntry(m); + if (!validated.ok) return validated; + if (seen.has(validated.entry.id)) { + return { ok: false, error: `duplicate model id: ${validated.entry.id}` }; + } + seen.add(validated.entry.id); + modelSpecs.push(validated.entry); + } + + let defaultModelId: string | undefined; + if (c.default_model_id !== undefined) { + if ( + typeof c.default_model_id !== "string" || + !seen.has(c.default_model_id) + ) { + return { + ok: false, + error: "default_model_id must name one of the registered models", + }; + } + defaultModelId = c.default_model_id; + } + + return { + ok: true, + config: { + id: c.id, + label: typeof c.label === "string" && c.label ? c.label : undefined, + base_url: baseUrl.base_url, + models: modelSpecs, + default_model_id: defaultModelId, + }, + }; +} + +type ModelEntryValidation = + | { ok: true; entry: EndpointModelEntry } + | { ok: false; error: string }; + +function validateModelEntry(raw: unknown): ModelEntryValidation { + if (!raw || typeof raw !== "object" || Array.isArray(raw)) { + return { ok: false, error: "model must be an object" }; + } + const m = raw as Record; + if ( + typeof m.id !== "string" || + m.id.length === 0 || + m.id.length > MAX_MODEL_ID_LEN + ) { + return { + ok: false, + error: `model id must be a non-empty string ≤ ${MAX_MODEL_ID_LEN}`, + }; + } + if ( + m.label !== undefined && + (typeof m.label !== "string" || m.label.length > MAX_LABEL_LEN) + ) { + return { + ok: false, + error: `model label must be a string ≤ ${MAX_LABEL_LEN}`, + }; + } + const flags = validateCapabilityFields(m, "model"); + if (!flags.ok) return flags; + + let overrides: EndpointModelOverrides | undefined; + if (m.overrides !== undefined) { + if ( + !m.overrides || + typeof m.overrides !== "object" || + Array.isArray(m.overrides) + ) { + return { ok: false, error: "model overrides must be an object" }; + } + const o = m.overrides as Record; + // Overrides carry only the detection-owned fields — no `outputLimit`. + const oFlags = validateCapabilityFields(o, "model overrides", [ + "contextWindow", + ]); + if (!oFlags.ok) return oFlags; + overrides = { + multimodal: o.multimodal as boolean | undefined, + tool_call: o.tool_call as boolean | undefined, + contextWindow: o.contextWindow as number | undefined, + }; + if (Object.values(overrides).every((v) => v === undefined)) { + overrides = undefined; + } + } + + return { + ok: true, + entry: { + id: m.id, + label: typeof m.label === "string" && m.label ? m.label : undefined, + multimodal: m.multimodal as boolean | undefined, + tool_call: m.tool_call as boolean | undefined, + contextWindow: m.contextWindow as number | undefined, + outputLimit: m.outputLimit as number | undefined, + overrides, + // cost is intentionally not accepted from config input: a local/ + // self-hosted model is unmetered on this rail, and a user-supplied + // price card would feed cost UI with invented numbers. + }, + }; +} + +function validateCapabilityFields( + source: Record, + scope: string, + limits: readonly ("contextWindow" | "outputLimit")[] = [ + "contextWindow", + "outputLimit", + ] +): { ok: true } | { ok: false; error: string } { + for (const flag of ["multimodal", "tool_call"] as const) { + if (source[flag] !== undefined && typeof source[flag] !== "boolean") { + return { ok: false, error: `${scope} ${flag} must be a boolean` }; + } + } + for (const limit of limits) { + const value = source[limit]; + if (value === undefined) continue; + if ( + typeof value !== "number" || + !Number.isInteger(value) || + value <= 0 || + value > MAX_TOKEN_LIMIT + ) { + return { + ok: false, + error: `${scope} ${limit} must be a positive integer`, + }; + } + } + return { ok: true }; +} diff --git a/packages/grida-ai-agent/src/protocol/handshake.ts b/packages/grida-ai-agent/src/protocol/handshake.ts index 3ff73a3d5..b013231fc 100644 --- a/packages/grida-ai-agent/src/protocol/handshake.ts +++ b/packages/grida-ai-agent/src/protocol/handshake.ts @@ -11,6 +11,12 @@ export type AgentServerCapabilities = { agent: boolean; workspaces: boolean; sessions: boolean; + /** + * `/providers/endpoints/*` — endpoint provider config CRUD (issue + * #806). Optional so older host-supplied capability shapes stay valid; + * clients treat a missing flag as "not served". + */ + providers?: boolean; /** Reserved for future `/shell/*` route group; always `false` in V1. */ shell: boolean; }; @@ -24,6 +30,7 @@ export const AGENT_SERVER_DEFAULT_CAPABILITIES: AgentServerCapabilities = { agent: true, workspaces: true, sessions: true, + providers: true, shell: false, }; diff --git a/packages/grida-ai-agent/src/protocol/provider-ids.ts b/packages/grida-ai-agent/src/protocol/provider-ids.ts index bfa3b89da..78efd49e5 100644 --- a/packages/grida-ai-agent/src/protocol/provider-ids.ts +++ b/packages/grida-ai-agent/src/protocol/provider-ids.ts @@ -23,3 +23,15 @@ export type ByokProviderId = ByokProviderMetadata["id"]; export const BYOK_PROVIDER_IDS = BYOK_PROVIDER_METADATA.map( (provider) => provider.id ) as readonly ByokProviderId[]; + +export function isByokProviderId(id: string): id is ByokProviderId { + return (BYOK_PROVIDER_IDS as readonly string[]).includes(id); +} + +/** + * A provider id anywhere on the wire (run options, session rows, secrets): + * a BYOK id or a configured endpoint id (issue #806). `string & {}` keeps + * literal completion for the BYOK ids while admitting endpoint ids, which + * are user-chosen slugs validated at the boundary. + */ +export type ProviderId = ByokProviderId | (string & {}); diff --git a/packages/grida-ai-agent/src/protocol/run.ts b/packages/grida-ai-agent/src/protocol/run.ts index 8cd2be37c..ae9c837ac 100644 --- a/packages/grida-ai-agent/src/protocol/run.ts +++ b/packages/grida-ai-agent/src/protocol/run.ts @@ -4,7 +4,7 @@ */ import type { models, ModelTier } from "@grida/ai-models"; -import type { ByokProviderId } from "./provider-ids"; +import type { ProviderId } from "./provider-ids"; import type { SkillId } from "./skills"; import type { AgentMode } from "./mode"; @@ -24,7 +24,13 @@ export const AGENT_SESSION_AGENT = "grida" as const; */ export const GRIDA_SESSION_SSE_EVENT = "grida-session" as const; -export type AgentModelId = models.text.CatalogId; +/** + * A runnable model id: a catalog id, or a user-registered model id served + * by a configured endpoint provider (issue #806 — e.g. `llama3.1:8b` on + * Ollama). Open on the wire; the run-input boundary validates against + * catalog ∪ registered ids, so an arbitrary string still 400s. + */ +export type AgentModelId = models.text.CatalogId | (string & {}); export type AgentRunMessagePart = { type: string; @@ -64,7 +70,11 @@ export type AgentRunOptions = { * the one `tier` would resolve to. */ model_id?: AgentModelId; - provider_id?: ByokProviderId; + /** + * Explicit provider pick (issue #806). Validated server-side against + * the allowed set; an unknown id 400s. + */ + provider_id?: ProviderId; feature?: string; workspace_id?: string; skills?: readonly SkillId[]; diff --git a/packages/grida-ai-agent/src/providers/byok.ts b/packages/grida-ai-agent/src/providers/byok.ts index 1e60ce24d..bdb1a9bc9 100644 --- a/packages/grida-ai-agent/src/providers/byok.ts +++ b/packages/grida-ai-agent/src/providers/byok.ts @@ -25,6 +25,10 @@ export function makeOpenRouterFactory(apiKey: string): ModelFactory { baseURL: "https://openrouter.ai/api/v1", apiKey, headers: OPENROUTER_HEADERS, + // OpenAI-compat streams omit the usage chunk unless + // `stream_options.include_usage` is requested — without it every + // streamed run records zero tokens (no rollups, no context meter). + includeUsage: true, }); // Both OpenRouter and the catalog use Vercel-style `creator/model` // ids, so an explicit pick hands straight through; otherwise fall @@ -36,3 +40,31 @@ export function makeVercelFactory(apiKey: string): ModelFactory { const provider = createGateway({ apiKey }); return (tier, modelId) => provider(modelId ?? MODEL_BY_TIER[tier]); } + +/** + * Factory for a user-configured OpenAI-compatible endpoint (issue #806) — + * Ollama, LiteLLM, vLLM, any self-hosted gateway. The "no signup" trick + * is that `api_key` is OPTIONAL: when absent (Ollama) no Authorization + * header is sent, and that is not an error. + * + * Tier mapping: EVERY tier resolves to the endpoint's default model. The + * catalog's tier→id table (`anthropic/claude-…`) is meaningless to a + * local endpoint, and background subagents (titler, compactor) resolve + * tiers too — they must land on a model this endpoint actually serves. + */ +export function makeEndpointFactory(config: { + id: string; + base_url: string; + api_key?: string; + default_model_id: string; +}): ModelFactory { + const provider = createOpenAICompatible({ + name: config.id, + baseURL: config.base_url, + apiKey: config.api_key, + // Same as the OpenRouter factory: opt in to the streaming usage + // chunk, or streamed runs record zero tokens. + includeUsage: true, + }); + return (_tier, modelId) => provider(modelId ?? config.default_model_id); +} diff --git a/packages/grida-ai-agent/src/providers/endpoints.live.test.ts b/packages/grida-ai-agent/src/providers/endpoints.live.test.ts new file mode 100644 index 000000000..a8a3ba74d --- /dev/null +++ b/packages/grida-ai-agent/src/providers/endpoints.live.test.ts @@ -0,0 +1,325 @@ +/** + * LIVE end-to-end — endpoint providers against a REAL local Ollama + * (issue #806). The durability bar for "no signup, no key": a host with + * NO BYOK secret and one configured endpoint must run the agent end to + * end — provider resolution, the run loop, session persistence, the + * background titler, and a real server-side tool execution. + * + * Gated + excluded from CI (needs a local `ollama serve` + a pulled + * model). Run explicitly: + * + * GRIDA_LIVE_OLLAMA=1 \ + * pnpm --filter @grida/agent vitest run src/providers/endpoints.live.test.ts + * + * Env knobs: + * GRIDA_LIVE_OLLAMA=1 — required, opts in. + * GRIDA_LIVE_OLLAMA_MODEL — model id to use (default: first from /api/tags). + * GRIDA_LIVE_OLLAMA_URL — base URL (default: the Ollama preset). + */ + +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeAll, beforeEach, describe, expect, it } from "vitest"; +import { Hono } from "hono"; +import { AuthStore } from "../auth/file"; +import { SecretsStore } from "../secrets"; +import { WorkspaceRegistry } from "../workspaces"; +import { openSessionsDb } from "../session/db"; +import { SessionsStore } from "../session/store"; +import { OLLAMA_ENDPOINT_PRESET } from "../protocol/endpoints"; +import { session_title } from "../session/title"; +import { AgentRuntime } from "../runtime"; +import { StreamRegistry } from "../runtime/stream-registry"; +import { registerAgentRoutes } from "../http/routes/agent"; +import { sessionIdFromSse } from "../testing/sse"; +import { EndpointProvidersStore } from "./endpoints"; +import { probeEndpointModels } from "./probe"; +import { resolveProvider } from "."; + +const LIVE = process.env.GRIDA_LIVE_OLLAMA === "1"; +const BASE_URL = + process.env.GRIDA_LIVE_OLLAMA_URL ?? OLLAMA_ENDPOINT_PRESET.base_url; +const TIMEOUT_MS = 300_000; + +const liveDescribe = LIVE ? describe : describe.skip; + +/** The model to test with — env override, else the first installed model. */ +async function detectModelId(): Promise { + if (process.env.GRIDA_LIVE_OLLAMA_MODEL) { + return process.env.GRIDA_LIVE_OLLAMA_MODEL; + } + const origin = new URL(BASE_URL).origin; + const res = await fetch(`${origin}/api/tags`); + const data = (await res.json()) as { models?: Array<{ name: string }> }; + const first = data.models?.[0]?.name; + if (!first) throw new Error("no Ollama models installed — `ollama pull` one"); + return first; +} + +// Concatenate the assistant's streamed text out of a drained SSE body. +function assistantTextFromSse(body: string): string { + let text = ""; + for (const frame of body.split("\n\n")) { + for (const line of frame.split("\n")) { + if (!line.startsWith("data:")) continue; + const payload = line.slice("data:".length).trim(); + if (!payload || payload === "[DONE]") continue; + try { + const obj = JSON.parse(payload) as { type?: string; delta?: string }; + if (obj.type === "text-delta" && typeof obj.delta === "string") { + text += obj.delta; + } + } catch { + /* not a JSON UIMessageChunk frame (e.g. the session frame) */ + } + } + } + return text; +} + +type Host = { + app: Hono; + runtime: AgentRuntime; + store: SessionsStore; + workspaces: WorkspaceRegistry; +}; + +function buildHost(baseDir: string): Host { + const auth = new AuthStore(baseDir); + const secrets = new SecretsStore(auth); + const endpoints = new EndpointProvidersStore(baseDir); + const workspaces = new WorkspaceRegistry(baseDir); + const db = openSessionsDb({ user_data_path: baseDir }); + const store = new SessionsStore(db); + const app = new Hono(); + const runtime = new AgentRuntime({ + secrets, + endpoints, + workspace_registry: workspaces, + sessions_store: store, + streams: new StreamRegistry(), + drain_cooldown_ms: 20, + }); + registerAgentRoutes(app, runtime); + return { app, runtime, store, workspaces }; +} + +async function runTurn( + host: Host, + body: Record +): Promise<{ status: number; text: string; session_id: string }> { + const res = await host.app.request("/agent/run", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(body), + }); + const sse = await res.text(); + return { + status: res.status, + text: assistantTextFromSse(sse), + session_id: sessionIdFromSse(sse), + }; +} + +let MODEL_ID = ""; + +liveDescribe("LIVE — Ollama endpoint provider, no key (issue #806)", () => { + let baseDir: string; + let host: Host; + + beforeAll(async () => { + MODEL_ID = await detectModelId(); + console.log(`[live-ollama] model=${MODEL_ID} base_url=${BASE_URL}`); + }); + + beforeEach(async () => { + baseDir = await fs.mkdtemp(path.join(os.tmpdir(), "grida-ollama-live-")); + // NO BYOK key is ever set — the whole point. Just the endpoint config. + const endpoints = new EndpointProvidersStore(baseDir); + await endpoints.set({ + ...OLLAMA_ENDPOINT_PRESET, + base_url: BASE_URL, + models: [{ id: MODEL_ID, contextWindow: 32_768, tool_call: true }], + }); + host = buildHost(baseDir); + }); + + afterEach(async () => { + // Conditional: a beforeEach failure leaves `host`/`baseDir` unset — + // teardown must surface the setup error, not mask it by throwing. + (host as Host | undefined)?.runtime.dispose(); + (host as Host | undefined)?.store.close(); + if (baseDir) await fs.rm(baseDir, { recursive: true, force: true }); + }); + + it( + "resolves the endpoint provider with no secret configured", + async () => { + const endpoints = new EndpointProvidersStore(baseDir); + const secrets = new SecretsStore(new AuthStore(baseDir)); + const provider = await resolveProvider({ secrets, endpoints }); + expect(provider.provider_id).toBe("ollama"); + expect(provider.kind).toBe("endpoint"); + }, + TIMEOUT_MS + ); + + it( + "probes the running Ollama and discovers the test model", + async () => { + const result = await probeEndpointModels(BASE_URL); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.source).toBe("ollama"); + const found = result.models.find((m) => m.id === MODEL_ID); + expect(found).toBeDefined(); + // The live model advertises tool support via /api/tags capabilities. + expect(found?.tool_call).toBe(true); + // Context window comes from /api/ps (loaded allocation) or + // /api/show (model max) — either way a real positive number. + expect(found?.contextWindow ?? 0).toBeGreaterThan(0); + }, + TIMEOUT_MS + ); + + it( + "runs a keyless text turn end-to-end and persists the session", + async () => { + const turn = await runTurn(host, { + messages: [ + { + role: "user", + content: + "Reply with exactly the word GRIDA_OK and nothing else. No punctuation.", + }, + ], + model_id: MODEL_ID, + }); + expect(turn.status).toBe(200); + expect(turn.session_id).toBeTruthy(); + expect(turn.text).toContain("GRIDA_OK"); + + const session = await host.store.get(turn.session_id!); + expect(session?.model?.provider_id).toBe("ollama"); + expect(session?.model?.model_id).toBe(MODEL_ID); + // Usage was recorded off the real stream. + expect(session?.total_tokens ?? 0).toBeGreaterThan(0); + + // The background titler rides the SAME endpoint factory (its `nano` + // tier must land on the local model). Poll for the rename. + let titled = false; + for (let i = 0; i < 60 && !titled; i++) { + await new Promise((r) => setTimeout(r, 1000)); + const row = await host.store.get(turn.session_id!); + titled = row != null && !session_title.isDefault(row.title); + } + expect(titled).toBe(true); + }, + TIMEOUT_MS + ); + + it( + "second turn continues the same session (server-authoritative view)", + async () => { + const first = await runTurn(host, { + messages: [ + { + role: "user", + content: + "My secret code word is ZUMBRA. Acknowledge with OK and nothing else.", + }, + ], + model_id: MODEL_ID, + }); + expect(first.status).toBe(200); + const second = await runTurn(host, { + session_id: first.session_id, + messages: [ + { + role: "user", + content: + "Reply with exactly my secret code word from earlier and nothing else.", + }, + ], + model_id: MODEL_ID, + }); + expect(second.status).toBe(200); + expect(second.session_id).toBe(first.session_id); + expect(second.text.toUpperCase()).toContain("ZUMBRA"); + }, + TIMEOUT_MS + ); + + it( + "manual compaction summarizes via the endpoint model (thinking-safe cap)", + async () => { + const first = await runTurn(host, { + messages: [ + { + role: "user", + content: + "We are naming a project. I propose the name AURELIA-9. Acknowledge briefly.", + }, + ], + model_id: MODEL_ID, + }); + expect(first.status).toBe(200); + + const res = await host.runtime.compact(first.session_id); + const result = (await res.json()) as { + compacted: boolean; + reason?: string; + summary_message_id?: string; + }; + // A thinking model with a too-tight output cap returns an EMPTY + // summary (`finish_reason: length` before any text) — `compacted` + // flips false ("summarizer-failed") or persists nothing useful. + expect(result.compacted).toBe(true); + + const messages = await host.store.listVisibleMessages(first.session_id); + const summaryPart = messages + .flatMap((m) => m.parts) + .find((p) => p.type === "data-compaction"); + const summary = ( + summaryPart?.data as { data?: { summary?: string } } | null + )?.data?.summary; + expect(summary ?? "").toMatch(/AURELIA-9/i); + }, + TIMEOUT_MS + ); + + it( + "executes a REAL server-side tool call (workspace fs write)", + async () => { + const wsRoot = await fs.mkdtemp( + path.join(os.tmpdir(), "grida-ollama-ws-") + ); + try { + const ws = await host.workspaces.open(wsRoot); + const turn = await runTurn(host, { + workspace_id: ws.id, + model_id: MODEL_ID, + // `auto` so the local run needs no supervised approval round-trip. + mode: "auto", + messages: [ + { + role: "user", + content: + "Use your file tools to create a file named hello.txt at the workspace root containing exactly: hello from ollama — then confirm.", + }, + ], + }); + expect(turn.status).toBe(200); + const written = await fs.readFile( + path.join(ws.root, "hello.txt"), + "utf8" + ); + expect(written.toLowerCase()).toContain("hello from ollama"); + } finally { + await fs.rm(wsRoot, { recursive: true, force: true }); + } + }, + TIMEOUT_MS + ); +}); diff --git a/packages/grida-ai-agent/src/providers/endpoints.test.ts b/packages/grida-ai-agent/src/providers/endpoints.test.ts new file mode 100644 index 000000000..4d8896352 --- /dev/null +++ b/packages/grida-ai-agent/src/providers/endpoints.test.ts @@ -0,0 +1,433 @@ +/** + * Endpoint provider layer (issue #806): config validation, the file- + * backed store, and the `/providers/endpoints/*` + extended `/secrets/*` + * routes. Runs against a tmp-dir store and a bare Hono app — no model, + * no network. + */ +import fs from "node:fs/promises"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; +import { Hono } from "hono"; +import { + OLLAMA_ENDPOINT_PRESET, + isValidEndpointProviderId, + mergeProbedModels, + resolveEndpointModel, + validateEndpointProviderConfig, + type EndpointProviderConfig, +} from "../protocol/endpoints"; +import { AuthStore } from "../auth/file"; +import { SecretsStore } from "../secrets"; +import { registerProvidersRoutes } from "../http/routes/providers"; +import { registerSecretsRoutes } from "../http/routes/secrets"; +import { EndpointProvidersStore } from "./endpoints"; + +const OLLAMA: EndpointProviderConfig = { + ...OLLAMA_ENDPOINT_PRESET, + models: [{ id: "llama3.1:8b" }, { id: "qwen3:32b", tool_call: false }], +}; + +describe("validateEndpointProviderConfig", () => { + it("accepts the Ollama preset shape", () => { + const result = validateEndpointProviderConfig(OLLAMA); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.config.id).toBe("ollama"); + expect(result.config.base_url).toBe("http://localhost:11434/v1"); + expect(result.config.models.length).toBe(2); + }); + + it("rejects BYOK-colliding and malformed ids", () => { + expect(isValidEndpointProviderId("openrouter")).toBe(false); + expect(isValidEndpointProviderId("vercel")).toBe(false); + expect(isValidEndpointProviderId("Ollama")).toBe(false); + expect(isValidEndpointProviderId("")).toBe(false); + expect(isValidEndpointProviderId("ollama")).toBe(true); + expect(isValidEndpointProviderId("my-gateway_2")).toBe(true); + }); + + it("rejects non-http(s) base URLs", () => { + for (const base_url of ["file:///etc", "ftp://x", "not a url", "", " "]) { + const result = validateEndpointProviderConfig({ ...OLLAMA, base_url }); + expect(result.ok).toBe(false); + } + }); + + it("trims whitespace padding off base_url before persisting", () => { + const result = validateEndpointProviderConfig({ + ...OLLAMA, + base_url: " http://localhost:11434/v1\n", + }); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.config.base_url).toBe("http://localhost:11434/v1"); + }); + + it("rejects duplicate model ids and a dangling default_model_id", () => { + expect( + validateEndpointProviderConfig({ + ...OLLAMA, + models: [{ id: "m" }, { id: "m" }], + }).ok + ).toBe(false); + expect( + validateEndpointProviderConfig({ + ...OLLAMA, + default_model_id: "not-registered", + }).ok + ).toBe(false); + expect( + validateEndpointProviderConfig({ + ...OLLAMA, + default_model_id: "qwen3:32b", + }).ok + ).toBe(true); + }); + + it("drops unknown fields and never accepts a cost card from input", () => { + const result = validateEndpointProviderConfig({ + ...OLLAMA, + models: [{ id: "m", cost: { input: 1, output: 2 }, evil: true }], + }); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.config.models[0]).not.toHaveProperty("cost"); + expect(result.config.models[0]).not.toHaveProperty("evil"); + }); + + it("accepts overrides and resolves them over detected values", () => { + const result = validateEndpointProviderConfig({ + ...OLLAMA, + models: [ + { + id: "m", + tool_call: true, + contextWindow: 262_144, + overrides: { contextWindow: 32_768, junk: true }, + }, + ], + }); + expect(result.ok).toBe(true); + if (!result.ok) return; + const entry = result.config.models[0]; + // Stored shape keeps both halves; unknown override keys are dropped. + expect(entry.contextWindow).toBe(262_144); + expect(entry.overrides).toEqual({ contextWindow: 32_768 }); + // Resolution: override wins, untouched fields fall through. + const resolved = resolveEndpointModel(entry); + expect(resolved.contextWindow).toBe(32_768); + expect(resolved.tool_call).toBe(true); + expect(resolved).not.toHaveProperty("overrides"); + }); + + it("rejects malformed overrides", () => { + expect( + validateEndpointProviderConfig({ + ...OLLAMA, + models: [{ id: "m", overrides: { contextWindow: -5 } }], + }).ok + ).toBe(false); + expect( + validateEndpointProviderConfig({ + ...OLLAMA, + models: [{ id: "m", overrides: "nope" }], + }).ok + ).toBe(false); + }); + + it("rejects out-of-range numeric limits", () => { + expect( + validateEndpointProviderConfig({ + ...OLLAMA, + models: [{ id: "m", contextWindow: -1 }], + }).ok + ).toBe(false); + expect( + validateEndpointProviderConfig({ + ...OLLAMA, + models: [{ id: "m", contextWindow: 1.5 }], + }).ok + ).toBe(false); + }); +}); + +describe("mergeProbedModels — the detection-owned merge contract", () => { + it("probe overwrites detected fields, never overrides; silent probe keeps prior detection", () => { + const result = mergeProbedModels( + [ + { + id: "gemma4:31b-mlx", + tool_call: false, // stale detection + contextWindow: 8_192, + overrides: { contextWindow: 32_768 }, + }, + { id: "unprobed:7b", tool_call: true }, // not in probe result + ], + [ + // tool_call reported, contextWindow silent (older Ollama): the + // silent field keeps the previous detection. + { id: "gemma4:31b-mlx", tool_call: true }, + ] + ); + expect(result.updated).toBe(1); + expect(result.discovered).toBe(0); + expect(result.models[0]).toEqual({ + id: "gemma4:31b-mlx", + tool_call: true, + contextWindow: 8_192, + overrides: { contextWindow: 32_768 }, // untouched, always + }); + expect(result.models[1]).toEqual({ id: "unprobed:7b", tool_call: true }); + }); + + it("appends newly discovered models and reports no-op merges", () => { + const result = mergeProbedModels( + [{ id: "known:8b", tool_call: true }], + [ + { id: "known:8b", tool_call: true }, // unchanged + { id: "new:31b", tool_call: true, contextWindow: 262_144 }, + ] + ); + expect(result.updated).toBe(0); + expect(result.discovered).toBe(1); + expect(result.models.map((m) => m.id)).toEqual(["known:8b", "new:31b"]); + expect(result.models[1].contextWindow).toBe(262_144); + + const noop = mergeProbedModels([{ id: "known:8b", tool_call: true }], []); + expect(noop.updated).toBe(0); + expect(noop.discovered).toBe(0); + }); +}); + +describe("EndpointProvidersStore", () => { + let baseDir: string; + let store: EndpointProvidersStore; + + beforeEach(async () => { + baseDir = await fs.mkdtemp(path.join(os.tmpdir(), "grida-endpoints-")); + store = new EndpointProvidersStore(baseDir); + }); + + afterEach(async () => { + await fs.rm(baseDir, { recursive: true, force: true }); + }); + + it("persists round-trip and survives a fresh store instance", async () => { + await store.set(OLLAMA); + const fresh = new EndpointProvidersStore(baseDir); + const list = await fresh.list(); + expect(list.length).toBe(1); + expect(list[0].id).toBe("ollama"); + expect(await fresh.get("ollama")).not.toBeNull(); + expect(await fresh.registeredModels()).toHaveLength(2); + }); + + it("set replaces the entry with the same id", async () => { + await store.set(OLLAMA); + await store.set({ ...OLLAMA, models: [{ id: "only-one" }] }); + expect(await store.registeredModels()).toHaveLength(1); + }); + + it("registeredModels applies overrides — every registry consumer sees effective values", async () => { + await store.set({ + ...OLLAMA, + models: [ + { + id: "capped:31b", + contextWindow: 262_144, + overrides: { contextWindow: 32_768 }, + }, + ], + }); + const models = await store.registeredModels(); + expect(models[0].contextWindow).toBe(32_768); + }); + + it("delete is idempotent", async () => { + await store.set(OLLAMA); + await store.delete("ollama"); + await store.delete("ollama"); + expect(await store.list()).toHaveLength(0); + }); + + it("rejects an invalid config thrown at the store layer", async () => { + await expect( + store.set({ ...OLLAMA, base_url: "file:///etc" }) + ).rejects.toThrow(/invalid config/); + }); + + it("concurrent first reads share one load — no empty-cache window", async () => { + await fs.writeFile( + path.join(baseDir, "endpoints.json"), + JSON.stringify([OLLAMA]), + "utf8" + ); + const fresh = new EndpointProvidersStore(baseDir); + const [list, entry, models] = await Promise.all([ + fresh.list(), + fresh.get("ollama"), + fresh.registeredModels(), + ]); + expect(list).toHaveLength(1); + expect(entry).not.toBeNull(); + expect(models).toHaveLength(2); + }); + + it("concurrent writes serialize — neither overwrites the other", async () => { + const other: EndpointProviderConfig = { + id: "litellm", + base_url: "http://localhost:4000/v1", + models: [{ id: "m" }], + }; + await Promise.all([store.set(OLLAMA), store.set(other)]); + expect((await store.list()).map((e) => e.id).sort()).toEqual([ + "litellm", + "ollama", + ]); + // The file agrees — a stale-snapshot persist would have dropped one. + const fresh = new EndpointProvidersStore(baseDir); + expect(await fresh.list()).toHaveLength(2); + }); + + it("drops invalid entries on load instead of failing", async () => { + await fs.writeFile( + path.join(baseDir, "endpoints.json"), + JSON.stringify([OLLAMA, { id: "broken" }, "junk"]), + "utf8" + ); + expect((await store.list()).map((e) => e.id)).toEqual(["ollama"]); + }); +}); + +describe("HTTP wire — /providers/endpoints/* and endpoint-id secrets", () => { + let baseDir: string; + let app: Hono; + let endpoints: EndpointProvidersStore; + let secrets: SecretsStore; + + beforeEach(async () => { + baseDir = await fs.mkdtemp(path.join(os.tmpdir(), "grida-providers-rt-")); + endpoints = new EndpointProvidersStore(baseDir); + secrets = new SecretsStore(new AuthStore(baseDir)); + app = new Hono(); + registerProvidersRoutes(app, { endpoints, secrets }); + registerSecretsRoutes(app, { store: secrets, endpoints }); + }); + + afterEach(async () => { + await fs.rm(baseDir, { recursive: true, force: true }); + }); + + const post = (route: string, body?: unknown) => + app.request(route, { + method: "POST", + headers: { "content-type": "application/json" }, + body: body === undefined ? undefined : JSON.stringify(body), + }); + + it("info reports the config file path", async () => { + const res = await post("/providers/endpoints/info"); + expect(res.status).toBe(200); + const { path: configPath } = (await res.json()) as { path: string }; + expect(configPath.endsWith("endpoints.json")).toBe(true); + }); + + it("set → list → delete round-trips", async () => { + const set = await post("/providers/endpoints/set", { config: OLLAMA }); + expect(set.status).toBe(200); + + const list = await post("/providers/endpoints/list"); + expect(list.status).toBe(200); + const configs = (await list.json()) as EndpointProviderConfig[]; + expect(configs.map((c) => c.id)).toEqual(["ollama"]); + + const del = await post("/providers/endpoints/delete", { id: "ollama" }); + expect(del.status).toBe(200); + expect(await (await post("/providers/endpoints/list")).json()).toEqual([]); + }); + + it("probe route returns parsed models, 502s an unreachable endpoint", async () => { + const probeApp = new Hono(); + registerProvidersRoutes(probeApp, { + endpoints, + probe: async (baseUrl: string) => + baseUrl.includes("11434") + ? { + ok: true as const, + source: "ollama" as const, + models: [{ id: "gemma4:31b-mlx", tool_call: true }], + } + : { ok: false as const, error: "no model listing at this endpoint" }, + }); + const probePost = (body: unknown) => + probeApp.request("/providers/endpoints/probe", { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(body), + }); + + const ok = await probePost({ base_url: "http://localhost:11434/v1" }); + expect(ok.status).toBe(200); + expect(await ok.json()).toEqual({ + source: "ollama", + models: [{ id: "gemma4:31b-mlx", tool_call: true }], + }); + + const down = await probePost({ base_url: "http://localhost:9/v1" }); + expect(down.status).toBe(502); + + const bad = await probePost({}); + expect(bad.status).toBe(400); + + // Malformed input is the caller's fault — 400, not a 502 "outage". + const malformed = await probePost({ base_url: "not a url" }); + expect(malformed.status).toBe(400); + const wrongScheme = await probePost({ base_url: "ftp://host/v1" }); + expect(wrongScheme.status).toBe(400); + }); + + it("400s an invalid config with the validator's message", async () => { + const res = await post("/providers/endpoints/set", { + config: { ...OLLAMA, id: "openrouter" }, + }); + expect(res.status).toBe(400); + const body = (await res.json()) as { error: string }; + expect(body.error).toMatch(/id/); + }); + + it("deleting an endpoint deletes its stored key — no orphaned credential", async () => { + await post("/providers/endpoints/set", { config: OLLAMA }); + await post("/secrets/set", { provider_id: "ollama", key: "gateway-key" }); + expect(await secrets.has("ollama")).toBe(true); + + await post("/providers/endpoints/delete", { id: "ollama" }); + // The key went with the endpoint: nothing stale in auth.json, and a + // re-created "ollama" endpoint can't silently reuse the old credential. + expect(await secrets.has("ollama")).toBe(false); + }); + + it("secrets routes accept a configured endpoint id, reject unknown ids", async () => { + // Unknown until configured. + expect( + (await post("/secrets/set", { provider_id: "ollama", key: "k" })).status + ).toBe(400); + + await post("/providers/endpoints/set", { config: OLLAMA }); + + expect( + (await post("/secrets/set", { provider_id: "ollama", key: "k" })).status + ).toBe(200); + const has = await post("/secrets/has", { provider_id: "ollama" }); + expect(((await has.json()) as { has: boolean }).has).toBe(true); + + // BYOK ids still work; junk still 400s. + expect( + (await post("/secrets/set", { provider_id: "openrouter", key: "k" })) + .status + ).toBe(200); + expect((await post("/secrets/has", { provider_id: "nope" })).status).toBe( + 400 + ); + }); +}); diff --git a/packages/grida-ai-agent/src/providers/endpoints.ts b/packages/grida-ai-agent/src/providers/endpoints.ts new file mode 100644 index 000000000..378c2b94f --- /dev/null +++ b/packages/grida-ai-agent/src/providers/endpoints.ts @@ -0,0 +1,163 @@ +/** + * GRIDA-SEC-004 — endpoint provider config store (issue #806). + * + * Persists user-configured OpenAI-compatible endpoints (Ollama, self- + * hosted gateways) at `${userData}/endpoints.json` with the same + * atomic-write pattern as `workspaces.json` / `recent.json`. + * + * Deliberately a SIBLING of `SecretsStore`, not part of it: an endpoint + * config (base URL + registered models) is plain readable config the + * renderer may list back, while secrets are write/presence/delete-only. + * If a gateway needs an API key, that key goes through the secrets + * surface under the endpoint's id — it never lands in this file. + * + * Every load re-validates entries through the protocol validator, so a + * hand-edited or corrupted file degrades to "entry dropped", never to + * an invalid config reaching the provider factory. + */ + +import fs from "node:fs/promises"; +import path from "node:path"; +import { + resolveEndpointModels, + validateEndpointProviderConfig, + type EndpointModelSpec, + type EndpointProviderConfig, +} from "../protocol/endpoints"; +import { isByokProviderId } from "../protocol/provider-ids"; +import { atomicWrite } from "../storage/atomic-write"; + +const FILE_NAME = "endpoints.json"; +const MAX_ENTRIES = 16; + +/** + * THE provider-id namespace gate: BYOK ids ∪ configured endpoint ids. + * Shared by every boundary that accepts a provider id (`/secrets/*` + * allowlist, the run-input `provider_id` gate) — a closed set; anything + * else must 400. + */ +export async function isKnownProviderId( + id: string, + endpoints?: EndpointProvidersStore +): Promise { + if (isByokProviderId(id)) return true; + return (await endpoints?.get(id)) != null; +} + +export class EndpointProvidersStore { + private entries: EndpointProviderConfig[] = []; + private load_promise: Promise | null = null; + private write_chain: Promise = Promise.resolve(); + private readonly file_path: string; + + constructor(userDataPath: string) { + this.file_path = path.join(userDataPath, FILE_NAME); + } + + /** Absolute path of the backing JSON — surfaced so the settings UI can + * point developers at the hand-editable file (overrides live there). */ + get filePath(): string { + return this.file_path; + } + + /** One shared load: concurrent first calls await the SAME read instead + * of a second caller observing the default empty cache mid-load. */ + private ensureLoaded(): Promise { + this.load_promise ??= this.loadOnce(); + return this.load_promise; + } + + private async loadOnce(): Promise { + try { + const raw = await fs.readFile(this.file_path, "utf8"); + const parsed = JSON.parse(raw); + if (Array.isArray(parsed)) { + const valid: EndpointProviderConfig[] = []; + for (const entry of parsed) { + const result = validateEndpointProviderConfig(entry); + if (result.ok && !valid.some((e) => e.id === result.config.id)) { + valid.push(result.config); + } + } + this.entries = valid.slice(0, MAX_ENTRIES); + } + } catch { + // Missing or corrupt file → empty. Endpoint config is cheap to + // re-enter; a hand-edit-the-JSON-to-recover UX would be hostile. + this.entries = []; + } + } + + /** Serialize mutations: each read-modify-persist runs against the + * previous one's result, so concurrent `set()`/`delete()` calls can't + * compute from stale snapshots and overwrite each other on disk. */ + private withWriteLock(fn: () => Promise): Promise { + const run = this.write_chain.then(fn); + this.write_chain = run.then( + () => undefined, + () => undefined + ); + return run; + } + + private async persist(): Promise { + await atomicWrite(this.file_path, JSON.stringify(this.entries, null, 2)); + } + + async list(): Promise { + await this.ensureLoaded(); + return [...this.entries]; + } + + async get(id: string): Promise { + await this.ensureLoaded(); + return this.entries.find((e) => e.id === id) ?? null; + } + + /** + * Insert or replace the config with the same id. The caller (route) + * validates the shape; this re-validates anyway so a non-route caller + * can't persist an invalid entry. + */ + async set(config: EndpointProviderConfig): Promise { + const result = validateEndpointProviderConfig(config); + if (!result.ok) { + throw new Error(`[agent-host-endpoints] invalid config: ${result.error}`); + } + await this.withWriteLock(async () => { + await this.ensureLoaded(); + const next = this.entries.filter((e) => e.id !== result.config.id); + if (next.length >= MAX_ENTRIES) { + throw new Error( + `[agent-host-endpoints] too many endpoint providers (max ${MAX_ENTRIES})` + ); + } + next.push(result.config); + this.entries = next; + await this.persist(); + }); + } + + async delete(id: string): Promise { + await this.withWriteLock(async () => { + await this.ensureLoaded(); + const next = this.entries.filter((e) => e.id !== id); + if (next.length === this.entries.length) return; + this.entries = next; + await this.persist(); + }); + } + + /** + * Every model registered across all endpoints, OVERRIDE-RESOLVED — + * the custom half of the model-registry seam + * (`models.text.registry.resolve(id, THIS)`). Consumers: the run-input + * model gate, compaction limits, multimodal/tool_call capability + * checks — all of them must see the effective values, never the raw + * detected fields. + */ + async registeredModels(): Promise { + await this.ensureLoaded(); + return this.entries.flatMap((e) => resolveEndpointModels(e)); + } +} diff --git a/packages/grida-ai-agent/src/providers/index.test.ts b/packages/grida-ai-agent/src/providers/index.test.ts index a461db3ae..8835249e3 100644 --- a/packages/grida-ai-agent/src/providers/index.test.ts +++ b/packages/grida-ai-agent/src/providers/index.test.ts @@ -1,19 +1,37 @@ import { describe, expect, it } from "vitest"; import type { SecretsStore } from "../secrets"; +import type { EndpointProviderConfig } from "../protocol/endpoints"; +import type { EndpointProvidersStore } from "./endpoints"; import { MODEL_BY_TIER, ProviderUnavailableError, resolveProvider, } from "./index"; -function deps(keys: Record = {}) { +function deps( + keys: Record = {}, + endpoints?: EndpointProviderConfig[] +) { return { secrets: { _getKey: async (providerId: string) => keys[providerId] ?? null, } as SecretsStore, + endpoints: endpoints + ? ({ + list: async () => endpoints, + get: async (id: string) => endpoints.find((e) => e.id === id) ?? null, + } as EndpointProvidersStore) + : undefined, }; } +const OLLAMA: EndpointProviderConfig = { + id: "ollama", + label: "Ollama", + base_url: "http://localhost:11434/v1", + models: [{ id: "llama3.1:8b" }, { id: "qwen3:32b" }], +}; + describe("resolveProvider", () => { it("prefers OpenRouter over Vercel when both BYOK keys exist", async () => { const provider = await resolveProvider( @@ -64,3 +82,68 @@ describe("resolveProvider", () => { expect(picked.modelId).toBe("google/gemini-3.5-flash"); }); }); + +describe("resolveProvider — endpoint providers (issue #806)", () => { + it("resolves a configured endpoint with NO key (the no-signup path)", async () => { + const provider = await resolveProvider(deps({}, [OLLAMA])); + expect(provider.provider_id).toBe("ollama"); + expect(provider.kind).toBe("endpoint"); + }); + + it("BYOK keys take precedence over configured endpoints", async () => { + const provider = await resolveProvider( + deps({ openrouter: "sk-or" }, [OLLAMA]) + ); + expect(provider.provider_id).toBe("openrouter"); + }); + + it("an explicit endpoint pick skips BYOK precedence", async () => { + const provider = await resolveProvider( + deps({ openrouter: "sk-or" }, [OLLAMA]), + { explicit: "ollama" } + ); + expect(provider.provider_id).toBe("ollama"); + expect(provider.kind).toBe("endpoint"); + }); + + it("an endpoint with no registered models is not resolvable", async () => { + const empty = { ...OLLAMA, models: [] }; + await expect(resolveProvider(deps({}, [empty]))).rejects.toBeInstanceOf( + ProviderUnavailableError + ); + await expect( + resolveProvider(deps({}, [empty]), { explicit: "ollama" }) + ).rejects.toMatchObject({ provider_id: "ollama" }); + }); + + it("an unknown explicit provider id throws with the picked id", async () => { + await expect( + resolveProvider(deps({}, [OLLAMA]), { explicit: "nope" }) + ).rejects.toMatchObject({ provider_id: "nope" }); + }); + + it("every tier maps to the endpoint's default model; explicit ids pass through", async () => { + const provider = await resolveProvider(deps({}, [OLLAMA])); + // No default_model_id configured → models[0]. The titler/compactor + // ask for `nano`; on an endpoint that must land on a served model, + // never the catalog tier id. + for (const tier of ["nano", "mini", "pro", "max"] as const) { + expect( + (provider.model_factory(tier) as { modelId: string }).modelId + ).toBe("llama3.1:8b"); + } + expect( + (provider.model_factory("pro", "qwen3:32b") as { modelId: string }) + .modelId + ).toBe("qwen3:32b"); + }); + + it("honors an explicit default_model_id", async () => { + const provider = await resolveProvider( + deps({}, [{ ...OLLAMA, default_model_id: "qwen3:32b" }]) + ); + expect((provider.model_factory("pro") as { modelId: string }).modelId).toBe( + "qwen3:32b" + ); + }); +}); diff --git a/packages/grida-ai-agent/src/providers/index.ts b/packages/grida-ai-agent/src/providers/index.ts index ade93d5fe..104bf38bc 100644 --- a/packages/grida-ai-agent/src/providers/index.ts +++ b/packages/grida-ai-agent/src/providers/index.ts @@ -1,5 +1,5 @@ /** - * GRIDA-SEC-004 — BYOK provider resolver (in-package providers layer). + * GRIDA-SEC-004 — provider resolver (in-package providers layer). * * Picks the active provider for an agent run and returns a runnable * `ModelFactory`. Resolution is a node-only, in-process concern: it reads @@ -7,9 +7,18 @@ * secrets threat model) and never calls the model itself — it only builds * the factory, so it's cheap on the hot path and easy to test. * - * This is the providers layer, not a generic model-provider router. V1 is - * BYOK-only: OpenRouter takes precedence over Vercel, and a missing - * key throws `ProviderUnavailableError`. + * This is the providers layer, not a generic model-provider router. Two + * provider kinds exist: + * + * - `byok` — the hardcoded third-party slots (OpenRouter, Vercel), + * keyed by a stored secret. + * - `endpoint` — ONE generalized OpenAI-compatible endpoint type + * (issue #806): user-configured `{base_url, models[]}` with an + * OPTIONAL key. Ollama is the preset; a missing key is not an error. + * + * Precedence: BYOK keys first (in metadata order), then configured + * endpoints that have at least one registered model. A configured-but- + * empty endpoint is not resolvable. Explicit picks skip precedence. */ import { TIER_MODEL_IDS, type TierModelId } from "@grida/ai-models"; @@ -18,31 +27,44 @@ import type { ModelTier } from "../tiers"; import type { SecretsStore } from "../secrets"; import { BYOK_PROVIDER_METADATA, + isByokProviderId, type ByokProviderId, } from "../protocol/provider-ids"; -import { makeOpenRouterFactory, makeVercelFactory } from "./byok"; +import { + endpointDefaultModelId, + type EndpointProviderConfig, +} from "../protocol/endpoints"; +import type { EndpointProvidersStore } from "./endpoints"; +import { + makeEndpointFactory, + makeOpenRouterFactory, + makeVercelFactory, +} from "./byok"; + +export { EndpointProvidersStore } from "./endpoints"; /** Canonical tier->catalog-model map. One table, sourced from @grida/ai-models. */ export const MODEL_BY_TIER: Record = TIER_MODEL_IDS; export type ResolvedProvider = { - provider_id: ByokProviderId; - kind: "byok"; + /** A BYOK provider id or a configured endpoint id. */ + provider_id: string; + kind: "byok" | "endpoint"; model_factory: ModelFactory; }; /** * Single error class for both "no provider configured" and "you picked - * provider X but no key is set" paths. The route maps `providerId` being - * present to a 4xx with the picked-id surfaced in the body. + * provider X but it isn't available" paths. The route maps `providerId` + * being present to a 4xx with the picked-id surfaced in the body. */ export class ProviderUnavailableError extends Error { readonly code = "provider_down" as const; constructor(public readonly provider_id?: string) { super( provider_id - ? `[agent-host-providers] explicit BYOK provider not available: ${provider_id}` - : "[agent-host-providers] no BYOK provider available" + ? `[agent-host-providers] explicit provider not available: ${provider_id}` + : "[agent-host-providers] no provider available" ); this.name = "ProviderUnavailableError"; } @@ -50,14 +72,17 @@ export class ProviderUnavailableError extends Error { export type ResolveDeps = { secrets: SecretsStore; + /** Endpoint provider configs. Optional so key-only hosts/tests need not + * wire a store; absent ⇒ no endpoint providers resolve. */ + endpoints?: EndpointProvidersStore; }; export type ResolveOptions = { /** * Optional caller override. If set, precedence is skipped and only the - * named BYOK provider is checked. + * named provider (BYOK or endpoint) is checked. */ - explicit?: ByokProviderId; + explicit?: string; }; export async function resolveProvider( @@ -71,7 +96,14 @@ export async function resolveProvider( for (const provider of BYOK_PROVIDER_METADATA) { const key = await deps.secrets._getKey(provider.id); if (key) { - return makeResolvedProvider(provider.id, key); + return makeResolvedByok(provider.id, key); + } + } + + if (deps.endpoints) { + for (const endpoint of await deps.endpoints.list()) { + const resolved = await maybeResolveEndpoint(endpoint, deps); + if (resolved) return resolved; } } @@ -79,15 +111,21 @@ export async function resolveProvider( } async function resolveExplicit( - providerId: ByokProviderId, + providerId: string, deps: ResolveDeps ): Promise { - const key = await deps.secrets._getKey(providerId); - if (!key) throw new ProviderUnavailableError(providerId); - return makeResolvedProvider(providerId, key); + if (isByokProviderId(providerId)) { + const key = await deps.secrets._getKey(providerId); + if (!key) throw new ProviderUnavailableError(providerId); + return makeResolvedByok(providerId, key); + } + const endpoint = await deps.endpoints?.get(providerId); + const resolved = endpoint && (await maybeResolveEndpoint(endpoint, deps)); + if (!resolved) throw new ProviderUnavailableError(providerId); + return resolved; } -function makeResolvedProvider( +function makeResolvedByok( providerId: ByokProviderId, key: string ): ResolvedProvider { @@ -108,3 +146,28 @@ function makeResolvedProvider( const _exhaustive: never = providerId; throw new ProviderUnavailableError(_exhaustive); } + +/** + * An endpoint resolves only when it has a model to run (the default + * model: explicit `default_model_id` or the first registered). The key is + * looked up under the endpoint's id and is optional by design — Ollama + * has no key, a self-hosted gateway may. + */ +async function maybeResolveEndpoint( + endpoint: EndpointProviderConfig, + deps: ResolveDeps +): Promise { + const defaultModelId = endpointDefaultModelId(endpoint); + if (!defaultModelId) return null; + const key = await deps.secrets._getKey(endpoint.id); + return { + provider_id: endpoint.id, + kind: "endpoint", + model_factory: makeEndpointFactory({ + id: endpoint.id, + base_url: endpoint.base_url, + api_key: key?.trim() || undefined, + default_model_id: defaultModelId, + }), + }; +} diff --git a/packages/grida-ai-agent/src/providers/probe.test.ts b/packages/grida-ai-agent/src/providers/probe.test.ts new file mode 100644 index 000000000..ed75ac78b --- /dev/null +++ b/packages/grida-ai-agent/src/providers/probe.test.ts @@ -0,0 +1,125 @@ +import { describe, expect, it } from "vitest"; +import { probeEndpointModels, type ProbeFetch } from "./probe"; + +/** Fake fetch keyed by URL; POSTs may key on `url body.model`. */ +function fakeFetch(routes: Record): ProbeFetch { + return async (url, init) => { + let key = url; + if (init.method === "POST" && init.body) { + const model = (JSON.parse(init.body) as { model?: string }).model; + if (model && `${url} ${model}` in routes) key = `${url} ${model}`; + } + if (key in routes) { + return new Response(JSON.stringify(routes[key]), { status: 200 }); + } + return new Response("not found", { status: 404 }); + }; +} + +const BASE = "http://localhost:11434/v1"; + +describe("probeEndpointModels", () => { + it("reads Ollama /api/tags with capability mapping", async () => { + const result = await probeEndpointModels( + BASE, + fakeFetch({ + "http://localhost:11434/api/tags": { + models: [ + { name: "gemma4:31b-mlx", capabilities: ["completion", "tools"] }, + { name: "tinyllama:1b", capabilities: ["completion"] }, + { name: "old-model:7b" }, // older Ollama: no capabilities field + ], + }, + }) + ); + expect(result).toEqual({ + ok: true, + source: "ollama", + models: [ + { id: "gemma4:31b-mlx", tool_call: true }, + { id: "tinyllama:1b", tool_call: false }, + { id: "old-model:7b", tool_call: undefined }, + ], + }); + }); + + it("fills the context window — loaded allocation beats the model max", async () => { + const result = await probeEndpointModels( + BASE, + fakeFetch({ + "http://localhost:11434/api/tags": { + models: [ + { name: "loaded:31b", capabilities: ["tools"] }, + { name: "cold:7b", capabilities: ["tools"] }, + { name: "opaque:1b", capabilities: ["tools"] }, + ], + }, + // `loaded:31b` is running with a capped allocation — /api/ps is + // the server's truth and must win over the /api/show maximum. + "http://localhost:11434/api/ps": { + models: [{ name: "loaded:31b", context_length: 32_768 }], + }, + "http://localhost:11434/api/show loaded:31b": { + model_info: { "gemma4.context_length": 262_144 }, + }, + "http://localhost:11434/api/show cold:7b": { + model_info: { "llama.context_length": 131_072 }, + }, + // `opaque:1b`: /api/show 404s → contextWindow stays unset. + }) + ); + expect(result.ok).toBe(true); + if (!result.ok) return; + const byId = new Map(result.models.map((m) => [m.id, m.contextWindow])); + expect(byId.get("loaded:31b")).toBe(32_768); + expect(byId.get("cold:7b")).toBe(131_072); + expect(byId.get("opaque:1b")).toBeUndefined(); + }); + + it("falls back to the OpenAI /models listing (ids only)", async () => { + const result = await probeEndpointModels( + "http://localhost:4000/v1", + fakeFetch({ + "http://localhost:4000/v1/models": { + object: "list", + data: [{ id: "gpt-proxy-a" }, { id: "gpt-proxy-b" }], + }, + }) + ); + expect(result).toEqual({ + ok: true, + source: "openai", + models: [{ id: "gpt-proxy-a" }, { id: "gpt-proxy-b" }], + }); + }); + + it("reports unreachable endpoints without throwing", async () => { + const result = await probeEndpointModels(BASE, async () => { + throw new Error("ECONNREFUSED"); + }); + expect(result.ok).toBe(false); + if (result.ok) return; + expect(result.error).toMatch(/is the server running/); + }); + + it("rejects non-http(s) and malformed base URLs", async () => { + for (const url of ["file:///etc/passwd", "not a url"]) { + const result = await probeEndpointModels(url, fakeFetch({})); + expect(result.ok).toBe(false); + } + }); + + it("skips malformed rows instead of failing the probe", async () => { + const result = await probeEndpointModels( + BASE, + fakeFetch({ + "http://localhost:11434/api/tags": { + models: [{ name: "good:1b" }, { nope: true }, "junk", { name: "" }], + }, + }) + ); + expect(result.ok).toBe(true); + if (!result.ok) return; + expect(result.models.map((m) => m.id)).toEqual(["good:1b"]); + }); +}); diff --git a/packages/grida-ai-agent/src/providers/probe.ts b/packages/grida-ai-agent/src/providers/probe.ts new file mode 100644 index 000000000..b8a713cc2 --- /dev/null +++ b/packages/grida-ai-agent/src/providers/probe.ts @@ -0,0 +1,246 @@ +/** + * GRIDA-SEC-004 — endpoint model probe (issue #806). + * + * Host-side discovery of the models an OpenAI-compatible endpoint + * serves, so the user never has to type model ids by hand. Host-side + * because the packaged renderer cannot reach the endpoint itself: its + * origin is `https://grida.co`, which Ollama's CORS policy rejects — + * only the agent host shares the machine with the endpoint. + * + * Two shapes, tried in order: + * + * 1. **Ollama native** — `GET /api/tags` for ids + capability + * tags (`tool_call` comes back real), enriched with the context + * window: `/api/ps` first (a LOADED model's `context_length` is the + * server's actual allocation — authoritative), then `/api/show` + * `model_info` (the model's maximum) for models not loaded. + * 2. **Generic OpenAI-compatible** — `GET /models` + * (LiteLLM, vLLM, …). Ids only. + * + * Context-window honesty: a server explicitly capped below a model's + * maximum (e.g. `OLLAMA_CONTEXT_LENGTH`) reports the cap via `/api/ps` + * only once the model is loaded — the `/api/show` maximum can overshoot + * such a setup. The field stays user-editable for exactly that case. + * + * Threat note (reviewed): the probe makes the host GET a user-supplied + * URL. This is the SAME egress the run path already performs against a + * configured endpoint (and the writer is the same authenticated loopback + * client), so it widens nothing — but the route must never become a + * generic proxy: responses are parsed and reduced to `{id, tool_call}` + * rows; raw bodies never reach the client. Reads are bounded (timeout + + * size cap) and the URL shape is pinned to http(s). + */ + +import { + parseEndpointBaseUrl, + type ProbedEndpointModel, +} from "../protocol/endpoints"; + +const PROBE_TIMEOUT_MS = 4_000; +const MAX_BODY_BYTES = 1_048_576; +const MAX_MODELS = 64; + +export type EndpointProbeResult = + | { ok: true; source: "ollama" | "openai"; models: ProbedEndpointModel[] } + | { ok: false; error: string }; + +/** The `fetch` seam — tests inject a fake; production uses the global. */ +export type ProbeFetch = ( + url: string, + init: { + signal: AbortSignal; + method?: string; + headers?: Record; + body?: string; + } +) => Promise; + +export async function probeEndpointModels( + baseUrl: string, + fetchImpl: ProbeFetch = fetch +): Promise { + const parsed = parseEndpointBaseUrl(baseUrl); + if (!parsed.ok) return parsed; + const { url } = parsed; + + // Both shapes probed concurrently (idempotent GETs; a generic gateway + // shouldn't wait out the full Ollama timeout first). Ollama wins when + // it answers — capability tags ride along. + const base = baseUrl.replace(/\/+$/, ""); + const ollamaProbe = requestJson(fetchImpl, `${url.origin}/api/tags`); + const openaiProbe = requestJson(fetchImpl, `${base}/models`); + + const ollama = await ollamaProbe; + if (ollama.ok) { + const models = parseOllamaTags(ollama.data); + if (models) { + await enrichContextWindows(fetchImpl, url.origin, models); + return { ok: true, source: "ollama", models }; + } + } + + // Generic OpenAI-compatible — ids only. + const openai = await openaiProbe; + if (openai.ok) { + const models = parseOpenAiModels(openai.data); + if (models) return { ok: true, source: "openai", models }; + } + + return { + ok: false, + error: + "no model listing at this endpoint — is the server running? " + + `(tried ${url.origin}/api/tags and ${base}/models)`, + }; +} + +type JsonProbe = { ok: true; data: unknown } | { ok: false }; + +/** One bounded JSON request: GET, or POST when `body` is given. Never + * throws — every failure mode (timeout, non-2xx, oversize, bad JSON) + * collapses to `{ok: false}`; the probe treats them all as "no answer". */ +async function requestJson( + fetchImpl: ProbeFetch, + url: string, + body?: unknown +): Promise { + try { + const res = await fetchImpl(url, { + signal: AbortSignal.timeout(PROBE_TIMEOUT_MS), + ...(body !== undefined + ? { + method: "POST", + headers: { "content-type": "application/json" }, + body: JSON.stringify(body), + } + : {}), + }); + if (!res.ok) return { ok: false }; + const text = await readBodyBounded(res); + if (text === null) return { ok: false }; + return { ok: true, data: JSON.parse(text) }; + } catch { + return { ok: false }; + } +} + +/** + * Read a response body of at most {@link MAX_BODY_BYTES} — the bound is + * enforced ON THE WIRE (declared length first, then a capped stream + * read), not by buffering an arbitrarily large body and measuring after. + * Returns `null` when the cap is exceeded. + */ +async function readBodyBounded(res: Response): Promise { + const declared = Number(res.headers.get("content-length")); + if (Number.isFinite(declared) && declared > MAX_BODY_BYTES) return null; + if (!res.body) { + const text = await res.text(); + return text.length > MAX_BODY_BYTES ? null : text; + } + const reader = res.body.getReader(); + const chunks: Uint8Array[] = []; + let total = 0; + for (;;) { + const { done, value } = await reader.read(); + if (done) break; + total += value.byteLength; + if (total > MAX_BODY_BYTES) { + void reader.cancel().catch(() => {}); + return null; + } + chunks.push(value); + } + const buf = new Uint8Array(total); + let offset = 0; + for (const chunk of chunks) { + buf.set(chunk, offset); + offset += chunk.byteLength; + } + return new TextDecoder().decode(buf); +} + +/** + * Fill `contextWindow` per model. `/api/ps` first — a loaded model's + * `context_length` is what the server actually allocated; `/api/show`'s + * `model_info..context_length` (the model's maximum) covers the + * rest. Every miss leaves the field unset (the registry default applies + * downstream). Mutates `models` in place. + */ +async function enrichContextWindows( + fetchImpl: ProbeFetch, + origin: string, + models: ProbedEndpointModel[] +): Promise { + const loaded = new Map(); + const ps = await requestJson(fetchImpl, `${origin}/api/ps`); + if (ps.ok) { + const rows = (ps.data as { models?: unknown } | null)?.models; + if (Array.isArray(rows)) { + for (const row of rows) { + const name = (row as { name?: unknown } | null)?.name; + const length = (row as { context_length?: unknown }).context_length; + if (typeof name === "string" && isPositiveInt(length)) { + loaded.set(name, length); + } + } + } + } + await Promise.all( + models.map(async (model) => { + const allocated = loaded.get(model.id); + if (allocated !== undefined) { + model.contextWindow = allocated; + return; + } + const show = await requestJson(fetchImpl, `${origin}/api/show`, { + model: model.id, + }); + if (!show.ok) return; + const info = (show.data as { model_info?: unknown } | null)?.model_info; + if (!info || typeof info !== "object") return; + for (const [key, value] of Object.entries(info)) { + if (key.endsWith(".context_length") && isPositiveInt(value)) { + model.contextWindow = value; + return; + } + } + }) + ); +} + +function isPositiveInt(value: unknown): value is number { + return typeof value === "number" && Number.isInteger(value) && value > 0; +} + +/** `GET /api/tags` → `{models: [{name, capabilities?: string[]}]}`. */ +function parseOllamaTags(data: unknown): ProbedEndpointModel[] | null { + const models = (data as { models?: unknown } | null)?.models; + if (!Array.isArray(models)) return null; + const out: ProbedEndpointModel[] = []; + for (const m of models.slice(0, MAX_MODELS)) { + const name = (m as { name?: unknown } | null)?.name; + if (typeof name !== "string" || name.length === 0) continue; + const caps = (m as { capabilities?: unknown }).capabilities; + out.push({ + id: name, + // Capabilities reported ⇒ trust them; absent (older Ollama) ⇒ + // unknown, leave undefined so the registry's permissive default + // applies downstream. + tool_call: Array.isArray(caps) ? caps.includes("tools") : undefined, + }); + } + return out; +} + +/** `GET /models` → `{data: [{id}]}` (OpenAI list shape). */ +function parseOpenAiModels(data: unknown): ProbedEndpointModel[] | null { + const rows = (data as { data?: unknown } | null)?.data; + if (!Array.isArray(rows)) return null; + const out: ProbedEndpointModel[] = []; + for (const m of rows.slice(0, MAX_MODELS)) { + const id = (m as { id?: unknown } | null)?.id; + if (typeof id !== "string" || id.length === 0) continue; + out.push({ id }); + } + return out; +} diff --git a/packages/grida-ai-agent/src/runtime/index.ts b/packages/grida-ai-agent/src/runtime/index.ts index 8ef05c90e..445985347 100644 --- a/packages/grida-ai-agent/src/runtime/index.ts +++ b/packages/grida-ai-agent/src/runtime/index.ts @@ -19,7 +19,6 @@ import crypto from "node:crypto"; import { AGENT_SESSION_AGENT } from "../protocol/run"; import { AGENT_DEFAULT_MODE } from "../protocol/mode"; -import type { ByokProviderId } from "../protocol/provider-ids"; import { resolveProvider, ProviderUnavailableError, @@ -28,15 +27,21 @@ import { import { createRecorderConsumer } from "../session/recorder"; import { titler } from "../session/titler"; import type { SessionsStore } from "../session/store"; -import type { MessageUsage } from "../session/rows"; +import type { ChatModel, MessageUsage } from "../session/rows"; import { DEFAULT_COMPACTION_CONFIG, compactSession, resolveModelLimits, shouldCompact, type CompactionConfig, + type ResolveModelLimits, } from "../session/compaction"; import type { compactor } from "../session/compactor"; +import { + endpointDefaultModelId, + resolveEndpointModels, + type EndpointProviderConfig, +} from "../protocol/endpoints"; import { discoverSkills } from "../skills/discovery"; import { discoverProjectInstructions } from "../skills/project-instructions"; import type { SkillBodyCache, SkillIndex } from "../skills/types"; @@ -80,7 +85,7 @@ type SessionContext = { async function resolveOrCreateSession( store: SessionsStore, req: RunRequest, - provider: { provider_id: ByokProviderId } + provider: { provider_id: string } ): Promise { if (req.session_id) { const existing = await store.get(req.session_id); @@ -200,6 +205,13 @@ export type AgentRuntimeDeps = ResolveDeps & { /** A provider resolved by {@link resolveProvider} (model factory + ids). */ type ResolvedProvider = Awaited>; +/** One store snapshot powering both compaction limits and the summarizer + * cap — see {@link AgentRuntime.limitsResolver}. */ +type LimitsResolution = { + resolve: ResolveModelLimits; + configs: readonly EndpointProviderConfig[]; +}; + /** * Everything {@link AgentRuntime.startTurn} needs to fire ONE turn, decoupled * from any HTTP request. The HTTP `run()` path and the core queue drain both @@ -469,6 +481,69 @@ export class AgentRuntime { return ctx; } + /** + * Registry-aware model-limits resolution (issue #806): resolves over + * catalog ∪ registered endpoint models, and substitutes an endpoint + * session's missing `model_id` with the endpoint's default model — a + * tier-only Ollama session must NOT fall back to the catalog tier's + * frontier-sized window (1M assumed on an 8k model ⇒ compaction never + * fires ⇒ the session dies on context overflow). Carries the loaded + * configs so downstream checks (the summarizer cap) reuse the same + * snapshot instead of re-reading the store. + */ + private async limitsResolver(): Promise { + const endpoints = this.deps.endpoints; + if (!endpoints) { + return { resolve: (model) => resolveModelLimits(model), configs: [] }; + } + const configs = await endpoints.list(); + const custom = configs.flatMap(resolveEndpointModels); + const resolve: ResolveModelLimits = (model) => { + let effective = model; + if (model?.provider_id) { + const endpoint = configs.find((e) => e.id === model.provider_id); + const defaultId = endpoint && endpointDefaultModelId(endpoint); + // Substitute the endpoint default when the session has no model + // id — or a STALE one (saved against a model since removed from + // the config): either way, falling through to the catalog tier + // would assume a frontier-sized window on a local model. "Known" + // is scoped to THIS endpoint's models — another endpoint serving + // the same id must not vouch for it. + const knownOnEndpoint = + !!model.model_id && + !!endpoint?.models.some((m) => m.id === model.model_id); + if (defaultId && !knownOnEndpoint) { + effective = { ...model, model_id: defaultId }; + } + } + return resolveModelLimits(effective, custom); + }; + return { resolve, configs }; + } + + /** + * The summarizer's input cap for a session (issue #806). The compactor + * subagent asks for the `nano` tier, but an endpoint factory maps every + * tier to the endpoint's default model — so when the session runs on a + * configured endpoint, the cap must be that model's window, not the + * catalog nano model's. `undefined` keeps the compaction default. + */ + private summarizerInputCap( + model: ChatModel | null, + limits: LimitsResolution + ): number | undefined { + const providerId = model?.provider_id; + if (!providerId) return undefined; + if (!limits.configs.some((e) => e.id === providerId)) return undefined; + // Limits of the endpoint's DEFAULT model (what `nano` resolves to): + // a model_id-less ChatModel routes through the resolver's default- + // model substitution above. Reserve room for the summary output — + // clamped to the window itself so a sub-5k model never gets handed + // more input than it can hold. + const window = limits.resolve({ provider_id: providerId }).context_window; + return Math.min(window, Math.max(1_024, window - 4_096)); + } + /** * Fire auto-compaction when the session is at/over its usable context * (RFC `session / compaction`). Blocks the turn on the summarizer — by @@ -484,8 +559,11 @@ export class AgentRuntime { if (!this.compaction_enabled) return; const session = await this.deps.sessions_store.get(sessionId); if (!session) return; - const limits = resolveModelLimits(session.model); - if (!shouldCompact(session.total_tokens, limits, this.compaction_config)) { + const limits = await this.limitsResolver(); + const modelLimits = limits.resolve(session.model); + if ( + !shouldCompact(session.total_tokens, modelLimits, this.compaction_config) + ) { return; } try { @@ -494,12 +572,14 @@ export class AgentRuntime { store: this.deps.sessions_store, model_factory: modelFactory, summarize: this.compaction_summarize, + resolve_limits: limits.resolve, }, { session_id: sessionId, auto: true, config: this.compaction_config, signal, + summarizer_input_cap: this.summarizerInputCap(session.model, limits), } ); } catch (err) { @@ -997,13 +1077,20 @@ export class AgentRuntime { } throw err; } + const limits = await this.limitsResolver(); const result = await compactSession( { store: this.deps.sessions_store, model_factory: provider.model_factory, summarize: this.compaction_summarize, + resolve_limits: limits.resolve, }, - { session_id: sessionId, auto: false, config: this.compaction_config } + { + session_id: sessionId, + auto: false, + config: this.compaction_config, + summarizer_input_cap: this.summarizerInputCap(session.model, limits), + } ); return Response.json(result); } diff --git a/packages/grida-ai-agent/src/runtime/run-input.test.ts b/packages/grida-ai-agent/src/runtime/run-input.test.ts index e6e5d2469..d9f8f17fc 100644 --- a/packages/grida-ai-agent/src/runtime/run-input.test.ts +++ b/packages/grida-ai-agent/src/runtime/run-input.test.ts @@ -315,3 +315,73 @@ describe("parseRunBody", () => { expect(parsed.approval_answer).toBeUndefined(); }); }); + +describe("parseRunBody — model/provider gates over the open registry (#806)", () => { + const msg = { messages: [{ role: "user", content: "hi" }] }; + const endpoints = { + registeredModels: async () => [{ id: "llama3.1:8b" }], + get: async (id: string) => + id === "ollama" + ? { id: "ollama", base_url: "http://localhost:11434/v1", models: [] } + : null, + }; + const deps = { + workspace_registry: { findById: async () => null }, + endpoints, + }; + const depsWithoutEndpoints = { + workspace_registry: { findById: async () => null }, + }; + + it("accepts a catalog model id", async () => { + const parsed = await parseRunBody( + { ...msg, model_id: "anthropic/claude-opus-4.8" }, + deps as never + ); + expect(parsed).not.toBeInstanceOf(Response); + }); + + it("accepts a registered endpoint model id", async () => { + const parsed = await parseRunBody( + { ...msg, model_id: "llama3.1:8b" }, + deps as never + ); + expect(parsed).not.toBeInstanceOf(Response); + if (parsed instanceof Response) return; + expect(parsed.model_id).toBe("llama3.1:8b"); + }); + + it("still 400s an unknown model id (the gate stays closed)", async () => { + const parsed = await parseRunBody( + { ...msg, model_id: "not-a-model" }, + deps as never + ); + expect(parsed).toBeInstanceOf(Response); + expect(parsed instanceof Response ? parsed.status : 0).toBe(400); + }); + + it("400s a registered-looking id when no endpoints store is wired", async () => { + const parsed = await parseRunBody( + { ...msg, model_id: "llama3.1:8b" }, + depsWithoutEndpoints as never + ); + expect(parsed).toBeInstanceOf(Response); + }); + + it("accepts a configured endpoint id as provider_id, rejects unknown", async () => { + const ok = await parseRunBody( + { ...msg, provider_id: "ollama" }, + deps as never + ); + expect(ok).not.toBeInstanceOf(Response); + if (ok instanceof Response) return; + expect(ok.explicit).toBe("ollama"); + + const bad = await parseRunBody( + { ...msg, provider_id: "nope" }, + deps as never + ); + expect(bad).toBeInstanceOf(Response); + expect(bad instanceof Response ? bad.status : 0).toBe(400); + }); +}); diff --git a/packages/grida-ai-agent/src/runtime/run-input.ts b/packages/grida-ai-agent/src/runtime/run-input.ts index 58dadfa4b..0a1bcea40 100644 --- a/packages/grida-ai-agent/src/runtime/run-input.ts +++ b/packages/grida-ai-agent/src/runtime/run-input.ts @@ -20,16 +20,15 @@ import { type AgentMode, } from "../protocol/mode"; import { AGENT_DEFAULT_TIER, AGENT_TIERS, type ModelTier } from "../tiers"; -import { - BYOK_PROVIDER_IDS, - type ByokProviderId, -} from "../protocol/provider-ids"; import type { SessionsStore } from "../session/store"; import type { WorkspaceRegistry } from "../workspaces"; +import { + isKnownProviderId, + type EndpointProvidersStore, +} from "../providers/endpoints"; -const ALLOWED_PROVIDER_IDS = new Set(BYOK_PROVIDER_IDS); const ALLOWED_TIERS = new Set(AGENT_TIERS); -const ALLOWED_MODEL_IDS = new Set(Object.keys(models.text.catalog)); +const CATALOG_MODEL_IDS = new Set(Object.keys(models.text.catalog)); const ALLOWED_ROLES = new Set(["user", "assistant", "system"]); const ALLOWED_SKILL_IDS = new Set(AGENT_SKILL_IDS); @@ -42,9 +41,11 @@ export type NormalizedMessage = { export type RunRequest = { messages: NormalizedMessage[]; tier: ModelTier; - /** Explicit catalog model id; overrides the tier→model mapping. */ + /** Explicit model id (catalog or registered); overrides the tier→model + * mapping. */ model_id?: AgentModelId; - explicit?: ByokProviderId; + /** Explicit provider pick: BYOK id or configured endpoint id. */ + explicit?: string; feature?: string; workspace_id?: string; workspace_root?: string; @@ -58,6 +59,9 @@ export type RunRequest = { export type ParseRunBodyDeps = { workspace_registry: WorkspaceRegistry; + /** Endpoint provider configs (issue #806). When present, registered + * model ids and endpoint provider ids join the allowed sets. */ + endpoints?: EndpointProvidersStore; }; export async function parseRunBody( @@ -92,7 +96,14 @@ export async function parseRunBody( : AGENT_DEFAULT_TIER; let modelId: AgentModelId | undefined; if (b.model_id !== undefined) { - if (typeof b.model_id !== "string" || !ALLOWED_MODEL_IDS.has(b.model_id)) { + // Allowed model ids = static catalog ∪ user-registered endpoint + // models (the open-registry seam, issue #806). Still a closed gate: + // an id neither table knows 400s. + const allowed = + typeof b.model_id === "string" && + (CATALOG_MODEL_IDS.has(b.model_id) || + (await isRegisteredModelId(b.model_id, deps))); + if (!allowed) { return Response.json( { error: `modelId not allowed: ${String(b.model_id)}` }, { status: 400 } @@ -100,18 +111,19 @@ export async function parseRunBody( } modelId = b.model_id as AgentModelId; } - let explicit: ByokProviderId | undefined; + let explicit: string | undefined; if (b.provider_id !== undefined) { - if ( - typeof b.provider_id !== "string" || - !ALLOWED_PROVIDER_IDS.has(b.provider_id) - ) { + const providerId = typeof b.provider_id === "string" ? b.provider_id : ""; + const allowed = + providerId.length > 0 && + (await isKnownProviderId(providerId, deps.endpoints)); + if (!allowed) { return Response.json( { error: `providerId not allowed: ${String(b.provider_id)}` }, { status: 400 } ); } - explicit = b.provider_id as ByokProviderId; + explicit = providerId; } let workspaceId: string | undefined; let workspaceRoot: string | undefined; @@ -159,6 +171,15 @@ export async function parseRunBody( }; } +async function isRegisteredModelId( + modelId: string, + deps: ParseRunBodyDeps +): Promise { + if (!deps.endpoints) return false; + const registered = await deps.endpoints.registeredModels(); + return registered.some((m) => m.id === modelId); +} + /** * The id of the user message a direct `/agent/run` fires — the LAST * user-role message of the incoming array (the AI SDK client resends the diff --git a/packages/grida-ai-agent/src/session/compaction.test.ts b/packages/grida-ai-agent/src/session/compaction.test.ts index dff63930c..1b3d9d0ba 100644 --- a/packages/grida-ai-agent/src/session/compaction.test.ts +++ b/packages/grida-ai-agent/src/session/compaction.test.ts @@ -98,6 +98,30 @@ describe("threshold helpers", () => { expect(limits.context_window).toBeGreaterThan(0); expect(limits.output_limit).toBeGreaterThan(0); }); + + it("resolveModelLimits resolves a registered local model's real window (#806)", () => { + const custom = [ + { id: "llama3.1:8b", contextWindow: 8_192, outputLimit: 2_048 }, + ]; + const limits = resolveModelLimits( + { provider_id: "ollama", tier: "pro", model_id: "llama3.1:8b" }, + custom + ); + // The pre-registry behavior fell back to the pro tier's frontier + // window (1M) for any unknown id — compaction never fired and the + // session died on overflow. The registry must surface the real 8k. + expect(limits.context_window).toBe(8_192); + expect(limits.output_limit).toBe(2_048); + }); + + it("resolveModelLimits still falls back to tier for unknown ids", () => { + const limits = resolveModelLimits({ + provider_id: "ollama", + tier: "nano", + model_id: "unknown:0b", + }); + expect(limits.context_window).toBeGreaterThan(100_000); + }); }); describe("splitTail", () => { diff --git a/packages/grida-ai-agent/src/session/compaction.ts b/packages/grida-ai-agent/src/session/compaction.ts index 3d8ab0c18..8d65c4690 100644 --- a/packages/grida-ai-agent/src/session/compaction.ts +++ b/packages/grida-ai-agent/src/session/compaction.ts @@ -67,12 +67,27 @@ export type ModelLimits = { output_limit: number; }; -/** Resolve a session's model limits from the catalog. Falls back to the - * default tier when the model can't be resolved. */ -export function resolveModelLimits(model: ChatModel | null): ModelLimits { - let spec = model?.model_id - ? models.text.modelSpecById(model.model_id) - : undefined; +/** A model-limits resolver. The default ({@link resolveModelLimits} with + * no custom list) only knows the static catalog; hosts with registered + * endpoint models inject a registry-aware one (see `AgentRuntime`). */ +export type ResolveModelLimits = (model: ChatModel | null) => ModelLimits; + +/** + * Resolve a session's model limits over catalog ∪ `custom` (the open- + * registry seam, issue #806). Falls back to the default tier when the + * model can't be resolved — note this fallback assumes a frontier-sized + * window, which is why registered local models MUST resolve through + * `custom` rather than land here (an 8k local model treated as 1M never + * compacts and dies on context overflow). + */ +export function resolveModelLimits( + model: ChatModel | null, + custom?: readonly models.text.registry.CustomModelSpec[] +): ModelLimits { + let spec: { contextWindow: number; outputLimit: number } | undefined = + model?.model_id + ? models.text.registry.resolve(model.model_id, custom) + : undefined; if (!spec && model?.tier) spec = models.text.byTier[model.tier]; if (!spec) spec = models.text.byTier.pro; return { context_window: spec.contextWindow, output_limit: spec.outputLimit }; @@ -204,6 +219,10 @@ export type CompactionDeps = { model_factory: ModelFactory; /** Injected summarizer (defaults to the real `compactor.summarize`). */ summarize?: compactor.Summarize; + /** Injected model-limits resolver (defaults to the catalog-only + * {@link resolveModelLimits}). Hosts with registered endpoint models + * inject a registry-aware one so local-model windows resolve real. */ + resolve_limits?: ResolveModelLimits; /** Warning sink. Defaults to console.warn. */ on_warn?: (message: string) => void; }; @@ -256,7 +275,7 @@ export async function compactSession( const session = await deps.store.get(opts.session_id); if (!session) return { compacted: false, reason: "session-not-found" }; - const limits = resolveModelLimits(session.model); + const limits = (deps.resolve_limits ?? resolveModelLimits)(session.model); const messages = await deps.store.listVisibleMessages(opts.session_id); diff --git a/packages/grida-ai-agent/src/session/compactor.ts b/packages/grida-ai-agent/src/session/compactor.ts index f28b61a68..93f475852 100644 --- a/packages/grida-ai-agent/src/session/compactor.ts +++ b/packages/grida-ai-agent/src/session/compactor.ts @@ -20,8 +20,12 @@ import type { ModelTier } from "../tiers"; /** Cheapest tier the provider exposes (RFC: `nano` / `small`). */ const COMPACTOR_TIER: ModelTier = "nano"; -const DEFAULT_MAX_OUTPUT_TOKENS = 1024; -const DEFAULT_TIMEOUT_MS = 30_000; +// The cap must cover REASONING + the summary: on a thinking model the +// output budget includes the think stream, and a tight cap truncates +// before the Markdown summary lands. Non-thinking models stop at the +// summary length anyway, so the ceiling is free for them. +const DEFAULT_MAX_OUTPUT_TOKENS = 2048; +const DEFAULT_TIMEOUT_MS = 60_000; const SYSTEM_PROMPT = `You compress a long agent/user conversation into a compact, faithful summary so the conversation can continue with less context. diff --git a/packages/grida-ai-agent/src/session/rows.ts b/packages/grida-ai-agent/src/session/rows.ts index 4459509e3..4ae8ad815 100644 --- a/packages/grida-ai-agent/src/session/rows.ts +++ b/packages/grida-ai-agent/src/session/rows.ts @@ -8,13 +8,13 @@ * payloads against them. */ -import type { ByokProviderId } from "../protocol/provider-ids"; +import type { ProviderId } from "../protocol/provider-ids"; import type { AgentModelId } from "../protocol/run"; import type { AgentMode } from "../protocol/mode"; import type { ModelTier } from "../tiers"; export type ChatModel = { - provider_id: ByokProviderId; + provider_id: ProviderId; tier?: ModelTier; model_id?: AgentModelId; }; diff --git a/packages/grida-ai-agent/src/session/titler.ts b/packages/grida-ai-agent/src/session/titler.ts index a911eaf1b..4dfdeca12 100644 --- a/packages/grida-ai-agent/src/session/titler.ts +++ b/packages/grida-ai-agent/src/session/titler.ts @@ -45,7 +45,13 @@ export namespace titler { system: SYSTEM_PROMPT, prompt, temperature: 0.3, - maxOutputTokens: 32, + // The cap must cover REASONING + text: on a thinking model + // (e.g. a local Ollama reasoning model) `completion_tokens` + // includes the think stream, and a tight cap is consumed before + // any title text lands (`finish_reason: length`, empty content). + // 512 leaves thinking headroom; a non-thinking nano stops at + // ~10 tokens anyway, so the ceiling costs nothing. + maxOutputTokens: 512, abortSignal: opts.signal, }); return sanitize(text); @@ -60,7 +66,10 @@ export namespace titler { model_factory: ModelFactory; /** First user message text — caller extracts from the request body. */ user_text: string; - /** Hard timeout for the title gen call. Defaults to 15s. */ + /** Hard timeout for the title gen call. Defaults to 60s — generous + * because the call is fire-and-forget (a ceiling, not a wait): fast + * hosted nanos finish in ~1s, while a local single-flight server + * (Ollama) may queue the titler behind the main turn. */ timeout_ms?: number; }; @@ -71,7 +80,7 @@ export namespace titler { if (!before) return null; if (!session_title.isDefault(before.title)) return null; - const signal = AbortSignal.timeout(opts.timeout_ms ?? 15_000); + const signal = AbortSignal.timeout(opts.timeout_ms ?? 60_000); const title = await generate({ model_factory: opts.model_factory, user_text: opts.user_text, diff --git a/packages/grida-ai-agent/src/transport.ts b/packages/grida-ai-agent/src/transport.ts index 82ceb599f..c06ee85c0 100644 --- a/packages/grida-ai-agent/src/transport.ts +++ b/packages/grida-ai-agent/src/transport.ts @@ -38,6 +38,10 @@ import type { WorkspaceReadFileResult, WorkspaceWriteFileResult, } from "./protocol/resources"; +import type { + EndpointProviderConfig, + ProbedEndpointModel, +} from "./protocol/endpoints"; function base64(value: string): string { const g = globalThis as unknown as { @@ -405,6 +409,34 @@ export namespace AgentTransport { }, } as const; + readonly providers = { + /** Endpoint provider configs (issue #806) — readable plain config, + * unlike secrets. */ + list_endpoints: async (): Promise => + await this.postJson( + "/providers/endpoints/list" + ), + set_endpoint: async (config: EndpointProviderConfig): Promise => { + await this.postJson("/providers/endpoints/set", { config }); + }, + delete_endpoint: async (id: string): Promise => { + await this.postJson("/providers/endpoints/delete", { id }); + }, + /** Where the endpoint config JSON lives on disk. */ + info: async (): Promise<{ path: string }> => + await this.postJson<{ path: string }>("/providers/endpoints/info"), + /** Discover the models an endpoint serves (host-side fetch). */ + probe_endpoint: async ( + baseUrl: string + ): Promise<{ + source: "ollama" | "openai"; + models: ProbedEndpointModel[]; + }> => + await this.postJson("/providers/endpoints/probe", { + base_url: baseUrl, + }), + } as const; + readonly sessions = { list: async (filter: SessionListFilter = {}): Promise => await this.getJson(sessionListPath(filter)), diff --git a/packages/grida-ai-models/README.md b/packages/grida-ai-models/README.md index 1ffc1fa52..5fe72ac40 100644 --- a/packages/grida-ai-models/README.md +++ b/packages/grida-ai-models/README.md @@ -64,6 +64,8 @@ Each `ModelSpec` contains: - `short_label` — optional, manually-curated compact name for space-constrained UI (e.g. `"Opus 4.8"`); falls back to `label` when unset - `multimodal` +- `tool_call` — whether the model supports native tool/function calling + (explicit on every entry; the agent loop is tool-heavy) - `contextWindow` - `outputLimit` - `cost` @@ -74,6 +76,20 @@ For UI that needs the compact name, call `models.text.displayLabel(spec)` — it returns `short_label` when present and `label` otherwise, so call sites never repeat the fallback. +### Open registry (`models.text.registry`) + +`models.text.registry` is the seam for **user-registered models** the static +catalogue does not know — local Ollama models, self-hosted OpenAI-compatible +gateways. A `CustomModelSpec` needs only an `id`; `normalize` fills +conservative defaults (8k context, tool-calling assumed) and +`resolve(id, custom)` looks an id up over catalogue ∪ custom (the catalogue +wins on collision). `cost` is optional on custom specs by design — a local +model is first-class without a price card. + +```ts +const spec = models.text.registry.resolve("llama3.1:8b", customSpecs); +``` + ## Media Models Media model data lives under the `models` namespace: diff --git a/packages/grida-ai-models/__tests__/registry.test.ts b/packages/grida-ai-models/__tests__/registry.test.ts new file mode 100644 index 000000000..08bef512d --- /dev/null +++ b/packages/grida-ai-models/__tests__/registry.test.ts @@ -0,0 +1,87 @@ +import { describe, expect, it } from "vitest"; +import models from "../src"; + +const registry = models.text.registry; + +describe("models.text.registry.normalize", () => { + it("fills defaults for a bare id", () => { + const spec = registry.normalize({ id: "llama3.1:8b" }); + expect(spec).toEqual({ + id: "llama3.1:8b", + label: "llama3.1:8b", + multimodal: false, + tool_call: true, + contextWindow: registry.CUSTOM_MODEL_DEFAULTS.contextWindow, + outputLimit: registry.CUSTOM_MODEL_DEFAULTS.outputLimit, + cost: undefined, + custom: true, + }); + }); + + it("keeps explicit fields, including tool_call: false", () => { + const spec = registry.normalize({ + id: "qwen3:32b", + label: "Qwen 3 32B", + tool_call: false, + contextWindow: 131_072, + outputLimit: 8_192, + multimodal: true, + }); + expect(spec.label).toBe("Qwen 3 32B"); + expect(spec.tool_call).toBe(false); + expect(spec.contextWindow).toBe(131_072); + expect(spec.outputLimit).toBe(8_192); + expect(spec.multimodal).toBe(true); + }); + + it("treats an empty label as absent", () => { + expect(registry.normalize({ id: "m", label: "" }).label).toBe("m"); + }); +}); + +describe("models.text.registry.resolve", () => { + const custom = [ + { id: "llama3.1:8b" }, + { id: "anthropic/claude-sonnet-4.6", label: "shadowed" }, + ]; + + it("resolves a catalogue id with custom: false and cost present", () => { + const spec = registry.resolve("anthropic/claude-opus-4.8", custom); + expect(spec?.custom).toBe(false); + expect(spec?.cost).toBeDefined(); + expect(spec?.tool_call).toBe(true); + }); + + it("resolves a registered local id with normalized defaults", () => { + const spec = registry.resolve("llama3.1:8b", custom); + expect(spec?.custom).toBe(true); + expect(spec?.cost).toBeUndefined(); + expect(spec?.contextWindow).toBe( + registry.CUSTOM_MODEL_DEFAULTS.contextWindow + ); + }); + + it("catalogue wins over a colliding custom entry", () => { + const spec = registry.resolve("anthropic/claude-sonnet-4.6", custom); + expect(spec?.custom).toBe(false); + expect(spec?.label).toBe("Claude Sonnet 4.6"); + }); + + it("returns undefined for an unknown id", () => { + expect(registry.resolve("nope:0b", custom)).toBeUndefined(); + expect(registry.resolve("nope:0b")).toBeUndefined(); + }); + + it("does not fuzzy-match custom ids (exact only)", () => { + // Catalogue lookup tolerates bare/date-suffixed ids; custom must not. + expect(registry.resolve("llama3.1", custom)).toBeUndefined(); + }); +}); + +describe("catalogue tool_call flags", () => { + it("every catalogue entry declares tool_call explicitly", () => { + for (const spec of Object.values(models.text.catalog)) { + expect(typeof spec.tool_call).toBe("boolean"); + } + }); +}); diff --git a/packages/grida-ai-models/src/models.ts b/packages/grida-ai-models/src/models.ts index 4fadf219a..1e2662b7d 100644 --- a/packages/grida-ai-models/src/models.ts +++ b/packages/grida-ai-models/src/models.ts @@ -90,6 +90,12 @@ export namespace models { short_label?: string; /** Whether the model accepts image/file inputs. */ multimodal: boolean; + /** + * Whether the model supports native tool/function calling. Explicit + * on every entry — the agent loop is tool-heavy, so this flag gates + * "can this model drive the agent at all" decisions downstream. + */ + tool_call: boolean; /** Maximum context window in tokens (input + output combined). */ contextWindow: number; /** Maximum output tokens per response. */ @@ -108,6 +114,7 @@ export namespace models { id: "openai/gpt-5.4-nano", label: "GPT-5.4 Nano", multimodal: true, + tool_call: true, contextWindow: 400_000, outputLimit: 128_000, cost: { input: 0.2, output: 1.25, cacheRead: 0.02 }, @@ -116,6 +123,7 @@ export namespace models { id: "openai/gpt-5.4-mini", label: "GPT-5.4 Mini", multimodal: true, + tool_call: true, contextWindow: 400_000, outputLimit: 128_000, cost: { input: 0.75, output: 4.5, cacheRead: 0.075 }, @@ -124,6 +132,7 @@ export namespace models { id: "openai/gpt-5.5", label: "GPT-5.5", multimodal: true, + tool_call: true, contextWindow: 1_050_000, outputLimit: 128_000, cost: { input: 5, output: 30, cacheRead: 0.5 }, @@ -132,6 +141,7 @@ export namespace models { id: "openai/gpt-5.5-pro", label: "GPT-5.5 Pro", multimodal: true, + tool_call: true, contextWindow: 1_050_000, outputLimit: 128_000, cost: { input: 30, output: 180 }, @@ -141,6 +151,7 @@ export namespace models { label: "Claude Sonnet 4.6", short_label: "Sonnet 4.6", multimodal: true, + tool_call: true, contextWindow: 1_000_000, outputLimit: 128_000, cost: { input: 3, output: 15, cacheRead: 0.3, cacheWrite: 3.75 }, @@ -150,6 +161,7 @@ export namespace models { label: "Claude Opus 4.8", short_label: "Opus 4.8", multimodal: true, + tool_call: true, contextWindow: 1_000_000, outputLimit: 128_000, cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, @@ -159,6 +171,7 @@ export namespace models { label: "Claude Opus 4.7", short_label: "Opus 4.7", multimodal: true, + tool_call: true, contextWindow: 1_000_000, outputLimit: 128_000, cost: { input: 5, output: 25, cacheRead: 0.5, cacheWrite: 6.25 }, @@ -170,6 +183,7 @@ export namespace models { id: "google/gemini-3.5-flash", label: "Gemini 3.5 Flash", multimodal: true, + tool_call: true, contextWindow: 1_048_576, outputLimit: 65_536, cost: { input: 1.5, output: 9, cacheRead: 0.15 }, @@ -179,6 +193,7 @@ export namespace models { label: "Gemini 3.1 Pro Preview", short_label: "Gemini 3.1 Pro", multimodal: true, + tool_call: true, contextWindow: 1_048_576, outputLimit: 65_536, cost: { input: 2, output: 12, cacheRead: 0.2 }, @@ -239,6 +254,103 @@ export namespace models { export function displayLabel(spec: ModelSpec): string { return spec.short_label ?? spec.label; } + + // ── models.text.registry ────────────────────────────────────────── + // + // The open-registry seam (issue #806): spec resolution over the + // static catalogue PLUS caller-supplied user-registered models (local + // Ollama models, self-hosted OpenAI-compatible gateways). Pure data — + // the caller owns where the custom list comes from (agent-host config, + // renderer fetch); this namespace only normalizes and resolves. + + export namespace registry { + /** + * A user-registered text model — a model the static catalogue does + * not know (e.g. `llama3.1:8b` served by a local Ollama). Everything + * but the id is optional; {@link normalize} fills defaults. + * + * `cost` is optional by design: local models are free/unmetered, and + * a registered model must be first-class without a price card. + */ + export interface CustomModelSpec { + /** Provider-side model id, verbatim (e.g. `"llama3.1:8b"`). */ + id: string; + /** Display label. Falls back to the id. */ + label?: string; + /** Whether the model accepts image/file inputs. Default `false`. */ + multimodal?: boolean; + /** + * Whether the model supports native tool/function calling. + * Default `true` (permissive) — consumers warn rather than block + * when this is explicitly `false`. + */ + tool_call?: boolean; + /** Context window in tokens. Default {@link CUSTOM_MODEL_DEFAULTS}. */ + contextWindow?: number; + /** Max output tokens per response. Default {@link CUSTOM_MODEL_DEFAULTS}. */ + outputLimit?: number; + /** Cost per 1M tokens in USD. Absent for local/unmetered models. */ + cost?: ModelCostPerMillion; + } + + /** + * A spec resolved through the open registry: either a catalogue + * {@link ModelSpec} (cost present, `custom: false`) or a normalized + * {@link CustomModelSpec} (cost may be absent, `custom: true`). + */ + export interface ResolvedModelSpec extends Omit { + cost?: ModelCostPerMillion; + /** True when the spec came from the caller's custom list. */ + custom: boolean; + } + + /** + * Defaults applied to a {@link CustomModelSpec} by {@link normalize}. + * + * The context window is deliberately conservative: overflowing a + * local model's real window kills the session mid-run, while a too- + * small assumption merely compacts early. 8k matches the common + * Ollama serving default; users with larger windows raise it in the + * model's config. + */ + export const CUSTOM_MODEL_DEFAULTS = { + multimodal: false, + tool_call: true, + contextWindow: 8_192, + outputLimit: 4_096, + } as const; + + /** Fill a custom spec's gaps with {@link CUSTOM_MODEL_DEFAULTS}. */ + export function normalize(spec: CustomModelSpec): ResolvedModelSpec { + return { + id: spec.id, + label: spec.label && spec.label.length > 0 ? spec.label : spec.id, + multimodal: spec.multimodal ?? CUSTOM_MODEL_DEFAULTS.multimodal, + tool_call: spec.tool_call ?? CUSTOM_MODEL_DEFAULTS.tool_call, + contextWindow: + spec.contextWindow ?? CUSTOM_MODEL_DEFAULTS.contextWindow, + outputLimit: spec.outputLimit ?? CUSTOM_MODEL_DEFAULTS.outputLimit, + cost: spec.cost, + custom: true, + }; + } + + /** + * Resolve a model id over catalogue ∪ custom. The catalogue wins on + * a collision (it carries curated labels + real pricing); custom ids + * match exactly — local ids like `llama3.1:8b` have no namespacing + * convention to fuzzy-match on. + */ + export function resolve( + modelId: string, + custom?: readonly CustomModelSpec[] + ): ResolvedModelSpec | undefined { + const fromCatalog = modelSpecById(modelId); + if (fromCatalog) return { ...fromCatalog, custom: false }; + const fromCustom = custom?.find((m) => m.id === modelId); + return fromCustom ? normalize(fromCustom) : undefined; + } + } } // ── models.image ────────────────────────────────────────────────── diff --git a/packages/grida-desktop-bridge/src/index.ts b/packages/grida-desktop-bridge/src/index.ts index aeae7ed10..d9c142965 100644 --- a/packages/grida-desktop-bridge/src/index.ts +++ b/packages/grida-desktop-bridge/src/index.ts @@ -11,7 +11,9 @@ import type { AgentRunOptions, AgentServerHandshakeResponse, AgentUIMessageChunk, - ByokProviderId, + ProviderId, + EndpointProviderConfig, + ProbedEndpointModel, ChatMessageWithParts, ChatSessionRow, CreateSessionOptions, @@ -224,9 +226,29 @@ export type DesktopBridge = { }) => Promise; }; secrets: { - has: (providerId: ByokProviderId) => Promise; - set: (providerId: ByokProviderId, key: string) => Promise; - delete: (providerId: ByokProviderId) => Promise; + has: (providerId: ProviderId) => Promise; + set: (providerId: ProviderId, key: string) => Promise; + delete: (providerId: ProviderId) => Promise; + }; + /** + * Endpoint provider config (issue #806) — user-configured OpenAI- + * compatible endpoints (Ollama preset, self-hosted gateways). Plain + * readable config, unlike `secrets`: list returns full configs. + * Optional — older desktop binaries don't carry it; renderers must + * feature-detect and hide the surface when absent. + */ + providers?: { + list_endpoints: () => Promise; + set_endpoint: (config: EndpointProviderConfig) => Promise; + delete_endpoint: (id: string) => Promise; + /** Where the endpoint config JSON lives (the hand-editable file). */ + info: () => Promise<{ path: string }>; + /** Discover the models an endpoint serves (agent-host-side fetch — + * the renderer's origin can't reach a local Ollama directly). */ + probe_endpoint: (baseUrl: string) => Promise<{ + source: "ollama" | "openai"; + models: ProbedEndpointModel[]; + }>; }; agent: { run: (