From 42004cc1518aafe3105afd6058d63b701eb7ea30 Mon Sep 17 00:00:00 2001 From: Brand Date: Thu, 11 Jun 2026 15:35:50 -0600 Subject: [PATCH 1/4] =?UTF-8?q?test(ollama):=20RED=20=E2=80=94=20failing?= =?UTF-8?q?=20tests=20for=20Settings.backend=20+=20selector=20+=20bootstra?= =?UTF-8?q?p=20wiring?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TDD red-pass for v0.1.1 Ollama daemon wiring (Track 1, item 1 per AGENT_OPERATIONS.md). Tests reference symbols that don't yet exist; CI will fail at compile, which IS the red. The green commit follows. selector.rs: - BackendKind::Ollama (new variant) + as_str() = "Ollama" - DEFAULT_OLLAMA_BASE_URL = "http://127.0.0.1:11434" - make_backend_for_kind(kind, inference_url, ollama_url, ollama_model) — settings-driven constructor; existing make_backend(base_url) preserved - existing detect / make_backend tests updated for the third variant (auto-detect still never picks Ollama — opt-in only) settings.rs: - Backend enum (Auto/LlamaCpp/Vllm/Ollama), serde lowercase - new fields: backend (default Auto), ollama_base_url, ollama_model - forward-compat: legacy settings.toml without `backend` parses to Auto - TOML round-trip for every Backend variant bootstrap.rs: - build_app_state honors Settings.backend: · Auto → existing platform detect (LlamaCpp/Vllm only) · Ollama → Ollama backend wired through (the headline assertion) · Explicit LlamaCpp/Vllm → that backend regardless of platform - Ollama with unset URL/model falls back to daemon defaults API server passes the configured model through implicitly: AppState.backend is Arc; the Ollama adapter already takes (base_url, model) in its constructor, so wiring it via bootstrap is the only change needed at the API layer. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/ocm-daemon/src/bootstrap.rs | 59 ++++++++++++++++++++- crates/ocm-daemon/src/settings.rs | 78 ++++++++++++++++++++++++++++ crates/ocm-inference/src/selector.rs | 46 ++++++++++++++++ 3 files changed, 182 insertions(+), 1 deletion(-) diff --git a/crates/ocm-daemon/src/bootstrap.rs b/crates/ocm-daemon/src/bootstrap.rs index 9591eae..5703ee9 100644 --- a/crates/ocm-daemon/src/bootstrap.rs +++ b/crates/ocm-daemon/src/bootstrap.rs @@ -132,7 +132,7 @@ pub async fn bootstrap(settings: Settings) { #[cfg(test)] mod tests { use super::*; - use crate::settings::Theme; + use crate::settings::{Backend, Theme}; fn test_settings() -> Settings { Settings { @@ -143,6 +143,9 @@ mod tests { inference_base_url: Some("http://127.0.0.1:18080".into()), mem0_base_url: Some("http://127.0.0.1:18765".into()), retrieval_top_k: Some(3), + backend: Backend::Auto, + ollama_base_url: None, + ollama_model: None, } } @@ -154,6 +157,7 @@ mod tests { let state = build_app_state(&s); assert_eq!(state.retrieval_top_k, DEFAULT_RETRIEVAL_TOP_K); // backend / memory clients are constructed; concrete name depends on platform + // (Auto never picks Ollama — it's opt-in). let backend_name = state.backend.name(); assert!(backend_name == "llama.cpp" || backend_name == "vLLM"); } @@ -165,6 +169,59 @@ mod tests { assert_eq!(state.retrieval_top_k, 3); } + #[test] + fn explicit_ollama_backend_is_wired_through_to_app_state() { + // The headline v0.1.1 wiring assertion: a user who selects backend = + // "ollama" in settings ends up with an Ollama InferenceBackend on the + // live AppState. Verified by the trait's `name()` ("Ollama" — see + // ocm_inference::ollama::Ollama::name). + let s = Settings { + backend: Backend::Ollama, + ollama_base_url: Some("http://127.0.0.1:11434".into()), + ollama_model: Some("llama3".into()), + ..Settings::default() + }; + let state = build_app_state(&s); + assert_eq!(state.backend.name(), "Ollama"); + } + + #[test] + fn explicit_ollama_uses_defaults_when_fields_unset() { + // backend = "ollama" with no URL/model still produces a constructible + // Ollama backend — bootstrap fills in the daemon's native defaults + // (port 11434, the existing ollama::DEFAULT_MODEL). + let s = Settings { + backend: Backend::Ollama, + ollama_base_url: None, + ollama_model: None, + ..Settings::default() + }; + let state = build_app_state(&s); + assert_eq!(state.backend.name(), "Ollama"); + } + + #[test] + fn explicit_llamacpp_overrides_platform_detect() { + // Users on a CUDA box who explicitly pick llama.cpp must get llama.cpp, + // even if auto-detect would have picked vLLM. + let s = Settings { + backend: Backend::LlamaCpp, + ..Settings::default() + }; + let state = build_app_state(&s); + assert_eq!(state.backend.name(), "llama.cpp"); + } + + #[test] + fn explicit_vllm_overrides_platform_detect() { + let s = Settings { + backend: Backend::Vllm, + ..Settings::default() + }; + let state = build_app_state(&s); + assert_eq!(state.backend.name(), "vLLM"); + } + #[tokio::test] async fn probe_url_returns_false_for_unreachable() { // Using port 1 (privileged, almost guaranteed not bound) on localhost diff --git a/crates/ocm-daemon/src/settings.rs b/crates/ocm-daemon/src/settings.rs index f4779a8..1d2c902 100644 --- a/crates/ocm-daemon/src/settings.rs +++ b/crates/ocm-daemon/src/settings.rs @@ -83,4 +83,82 @@ mod tests { let s = Settings::load_or_default(&path).unwrap(); assert_eq!(s, Settings::default()); } + + #[test] + fn default_backend_is_auto() { + // Auto preserves the platform-detect behavior that shipped before this + // field existed; explicit selection (LlamaCpp / Vllm / Ollama) is opt-in. + let s = Settings::default(); + assert_eq!(s.backend, Backend::Auto); + assert_eq!(s.ollama_base_url, None); + assert_eq!(s.ollama_model, None); + } + + #[test] + fn backend_serializes_lowercase() { + // TOML keys are lowercase by convention; matches Theme's serde shape. + let raw = toml::to_string(&Settings { + backend: Backend::Ollama, + ..Settings::default() + }) + .unwrap(); + assert!(raw.contains("backend = \"ollama\"")); + } + + #[test] + fn ollama_settings_round_trip_via_toml() { + let dir = tempdir().unwrap(); + let path = dir.path().join("settings.toml"); + let original = Settings { + backend: Backend::Ollama, + ollama_base_url: Some("http://127.0.0.1:11434".into()), + ollama_model: Some("llama3".into()), + ..Settings::default() + }; + original.save(&path).unwrap(); + let loaded = Settings::load_or_default(&path).unwrap(); + assert_eq!(loaded, original); + assert_eq!(loaded.backend, Backend::Ollama); + assert_eq!( + loaded.ollama_base_url.as_deref(), + Some("http://127.0.0.1:11434") + ); + assert_eq!(loaded.ollama_model.as_deref(), Some("llama3")); + } + + #[test] + fn legacy_settings_toml_without_backend_field_still_parses() { + // Forward-compat: users with a settings.toml written before v0.1.1 + // (no `backend` key) must still load — the new field defaults to Auto. + let dir = tempdir().unwrap(); + let path = dir.path().join("settings.toml"); + let legacy = r#" +api_port = 7300 +mcp_enabled = true +theme = "system" +"#; + std::fs::write(&path, legacy).unwrap(); + let loaded = Settings::load_or_default(&path).unwrap(); + assert_eq!(loaded.backend, Backend::Auto); + assert_eq!(loaded.ollama_base_url, None); + assert_eq!(loaded.ollama_model, None); + } + + #[test] + fn all_backend_variants_round_trip() { + for kind in [ + Backend::Auto, + Backend::LlamaCpp, + Backend::Vllm, + Backend::Ollama, + ] { + let s = Settings { + backend: kind, + ..Settings::default() + }; + let raw = toml::to_string(&s).unwrap(); + let back: Settings = toml::from_str(&raw).unwrap(); + assert_eq!(back.backend, kind, "round-trip failed for {kind:?}"); + } + } } diff --git a/crates/ocm-inference/src/selector.rs b/crates/ocm-inference/src/selector.rs index a11f023..b353d3f 100644 --- a/crates/ocm-inference/src/selector.rs +++ b/crates/ocm-inference/src/selector.rs @@ -60,6 +60,8 @@ mod tests { #[test] fn detect_returns_one_of_two_kinds() { + // `detect_backend_kind()` is the AUTO-detect path; it never picks Ollama + // (Ollama is opt-in via Settings.backend, not platform-default). let kind = detect_backend_kind(); assert!(matches!(kind, BackendKind::LlamaCpp | BackendKind::Vllm)); } @@ -93,6 +95,7 @@ mod tests { let expected = match detect_backend_kind() { BackendKind::LlamaCpp => "llama.cpp", BackendKind::Vllm => "vLLM", + BackendKind::Ollama => "Ollama", }; assert_eq!(backend.name(), expected); } @@ -101,5 +104,48 @@ mod tests { fn backend_kind_as_str_matches_name() { assert_eq!(BackendKind::LlamaCpp.as_str(), "llama.cpp"); assert_eq!(BackendKind::Vllm.as_str(), "vLLM"); + // The Ollama adapter's InferenceBackend::name() returns "Ollama" — keep + // as_str() in lockstep so log/telemetry never disagree on labels. + assert_eq!(BackendKind::Ollama.as_str(), "Ollama"); + } + + #[test] + fn default_ollama_base_url_is_native_daemon_port() { + // 11434 is the Ollama daemon's installed default; if this ever changes + // upstream we want the test to force us to revisit the constant. + assert_eq!(DEFAULT_OLLAMA_BASE_URL, "http://127.0.0.1:11434"); + } + + #[test] + fn make_backend_for_kind_ollama_constructs_ollama_backend() { + let backend = make_backend_for_kind( + BackendKind::Ollama, + "http://127.0.0.1:8080".to_string(), + "http://127.0.0.1:11434".to_string(), + "llama3".to_string(), + ); + assert_eq!(backend.name(), "Ollama"); + } + + #[test] + fn make_backend_for_kind_llamacpp_ignores_ollama_args() { + let backend = make_backend_for_kind( + BackendKind::LlamaCpp, + "http://127.0.0.1:8080".to_string(), + "http://127.0.0.1:11434".to_string(), + "llama3".to_string(), + ); + assert_eq!(backend.name(), "llama.cpp"); + } + + #[test] + fn make_backend_for_kind_vllm_ignores_ollama_args() { + let backend = make_backend_for_kind( + BackendKind::Vllm, + "http://127.0.0.1:8000".to_string(), + "http://127.0.0.1:11434".to_string(), + "llama3".to_string(), + ); + assert_eq!(backend.name(), "vLLM"); } } From cb61171af7e7e414ab69011d183819bc827a9659 Mon Sep 17 00:00:00 2001 From: Brand Date: Thu, 11 Jun 2026 15:38:37 -0600 Subject: [PATCH 2/4] =?UTF-8?q?feat(ollama):=20GREEN=20=E2=80=94=20wire=20?= =?UTF-8?q?Settings.backend=20through=20to=20AppState?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Minimal implementation to turn the RED commit green. The Ollama adapter already exists (crates/ocm-inference/src/ollama.rs); this commit makes it constructible from settings. selector.rs: - BackendKind::Ollama variant; as_str() = "Ollama" (matches Ollama::name) - DEFAULT_OLLAMA_BASE_URL = "http://127.0.0.1:11434" (Ollama daemon default) - make_backend_for_kind(kind, inference_url, ollama_url, ollama_model): settings-driven constructor; keeps make_backend(base_url) for back-compat - detect_backend_kind never returns Ollama (opt-in only — auto-detect picks between backends OCM can supervise itself) settings.rs: - Backend enum (Auto/LlamaCpp/Vllm/Ollama), serde lowercase, default Auto - new fields: backend, ollama_base_url, ollama_model - all new fields #[serde(default)] for forward-compat with old settings.toml bootstrap.rs: - resolve_backend_kind(Settings.backend) → BackendKind (Auto delegates to detect_backend_kind; explicit selections override) - build_app_state resolves Ollama URL/model to constants when unset and calls make_backend_for_kind — Ollama backend now flows through to AppState.backend, satisfying the "API server passes the configured model through" requirement (AppState.backend: Arc is the abstraction boundary) Frontend type + UI changes follow in the next commit (no Rust impact). Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/ocm-daemon/src/bootstrap.rs | 28 ++++++++++++++++--- crates/ocm-daemon/src/settings.rs | 36 +++++++++++++++++++++++++ crates/ocm-inference/src/selector.rs | 40 +++++++++++++++++++++++++++- 3 files changed, 100 insertions(+), 4 deletions(-) diff --git a/crates/ocm-daemon/src/bootstrap.rs b/crates/ocm-daemon/src/bootstrap.rs index 5703ee9..88727b5 100644 --- a/crates/ocm-daemon/src/bootstrap.rs +++ b/crates/ocm-daemon/src/bootstrap.rs @@ -7,8 +7,9 @@ //! functionality and logs warnings. The user gets a Tauri tray + window //! that reports status; chat requests fail with clear errors. -use crate::settings::Settings; -use ocm_inference::selector; +use crate::settings::{Backend, Settings}; +use ocm_inference::ollama::DEFAULT_MODEL as DEFAULT_OLLAMA_MODEL; +use ocm_inference::selector::{self, BackendKind, DEFAULT_OLLAMA_BASE_URL}; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::sync::Arc; use std::time::Duration; @@ -60,6 +61,17 @@ async fn probe_url(base: &str, path: &str) -> bool { matches!(client.get(&url).send().await, Ok(r) if r.status().is_success()) } +/// Resolve `Settings.backend` to a concrete `BackendKind`. `Auto` delegates to +/// the existing platform detect; explicit settings win over detection. +fn resolve_backend_kind(setting: Backend) -> BackendKind { + match setting { + Backend::Auto => selector::detect_backend_kind(), + Backend::LlamaCpp => BackendKind::LlamaCpp, + Backend::Vllm => BackendKind::Vllm, + Backend::Ollama => BackendKind::Ollama, + } +} + /// Construct the full AppState given settings. pub fn build_app_state(settings: &Settings) -> ocm_api::AppState { let inference_url = settings @@ -70,8 +82,18 @@ pub fn build_app_state(settings: &Settings) -> ocm_api::AppState { .mem0_base_url .clone() .unwrap_or_else(|| DEFAULT_MEM0_BASE_URL.to_string()); + let ollama_url = settings + .ollama_base_url + .clone() + .unwrap_or_else(|| DEFAULT_OLLAMA_BASE_URL.to_string()); + let ollama_model = settings + .ollama_model + .clone() + .unwrap_or_else(|| DEFAULT_OLLAMA_MODEL.to_string()); - let backend = selector::make_backend(inference_url); + let kind = resolve_backend_kind(settings.backend); + info!(backend = kind.as_str(), "selected inference backend"); + let backend = selector::make_backend_for_kind(kind, inference_url, ollama_url, ollama_model); let memory = Arc::new(ocm_memory::Mem0Client::new(memory_url, "ocm-default")); let backend: Arc = Arc::from(backend); diff --git a/crates/ocm-daemon/src/settings.rs b/crates/ocm-daemon/src/settings.rs index 1d2c902..9325906 100644 --- a/crates/ocm-daemon/src/settings.rs +++ b/crates/ocm-daemon/src/settings.rs @@ -18,6 +18,22 @@ pub struct Settings { /// Number of memories to retrieve per chat turn. Default 5. Set to 0 to disable. #[serde(default)] pub retrieval_top_k: Option, + /// Which inference backend to use. `Auto` (default) preserves pre-v0.1.1 + /// behavior: platform-detect picks llama.cpp on Mac/Windows/CPU-Linux and + /// vLLM on CUDA Linux. Explicit `LlamaCpp` / `Vllm` / `Ollama` override + /// detection — the Ollama branch is the "I have an Ollama daemon already, + /// point OCM at it" zero-extra-process path. + #[serde(default)] + pub backend: Backend, + /// Override the Ollama daemon URL. Only consulted when `backend = "ollama"`. + /// Default (when unset) is `http://127.0.0.1:11434` — Ollama's installed default. + #[serde(default)] + pub ollama_base_url: Option, + /// Ollama model tag (e.g. `llama3`, `qwen2.5:7b`). REQUIRED by the Ollama + /// native API — there is no server-side default. When unset, bootstrap + /// falls back to `ocm_inference::ollama::DEFAULT_MODEL`. + #[serde(default)] + pub ollama_model: Option, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Copy)] @@ -28,6 +44,23 @@ pub enum Theme { System, } +/// Inference backend selection. Wire-format is lowercase TOML (`backend = "ollama"`). +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Copy)] +#[serde(rename_all = "lowercase")] +pub enum Backend { + /// Platform-detect (existing pre-v0.1.1 behavior). + Auto, + LlamaCpp, + Vllm, + Ollama, +} + +impl Default for Backend { + fn default() -> Self { + Backend::Auto + } +} + impl Default for Settings { fn default() -> Self { Self { @@ -38,6 +71,9 @@ impl Default for Settings { inference_base_url: None, mem0_base_url: None, retrieval_top_k: None, + backend: Backend::Auto, + ollama_base_url: None, + ollama_model: None, } } } diff --git a/crates/ocm-inference/src/selector.rs b/crates/ocm-inference/src/selector.rs index b353d3f..5a3819f 100644 --- a/crates/ocm-inference/src/selector.rs +++ b/crates/ocm-inference/src/selector.rs @@ -6,13 +6,19 @@ //! - Linux without CUDA -> llama.cpp (CPU) //! - Windows -> llama.cpp //! - everything else -> llama.cpp (safest fallback) +//! +//! Ollama is **opt-in** (Settings.backend = "ollama"); it is never returned +//! from `detect_backend_kind()`. Auto-detect picks between the two backends +//! OCM can supervise itself; Ollama bridges to an *external* daemon and is +//! a deliberate user choice. -use crate::{llamacpp::LlamaCpp, vllm::Vllm, InferenceBackend}; +use crate::{llamacpp::LlamaCpp, ollama::Ollama, vllm::Vllm, InferenceBackend}; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BackendKind { LlamaCpp, Vllm, + Ollama, } impl BackendKind { @@ -20,10 +26,16 @@ impl BackendKind { match self { BackendKind::LlamaCpp => "llama.cpp", BackendKind::Vllm => "vLLM", + // Kept in lockstep with Ollama::name() so log/telemetry labels match. + BackendKind::Ollama => "Ollama", } } } +/// Native Ollama daemon default — the daemon binds 127.0.0.1:11434 out of the +/// box. Bootstrap falls back to this when Settings.ollama_base_url is unset. +pub const DEFAULT_OLLAMA_BASE_URL: &str = "http://127.0.0.1:11434"; + pub fn detect_backend_kind() -> BackendKind { if cfg!(target_os = "macos") { BackendKind::LlamaCpp @@ -47,10 +59,36 @@ fn has_cuda() -> bool { false } +/// Auto-detect-only constructor preserved for back-compat. Settings-driven +/// callers (the daemon's bootstrap) should use `make_backend_for_kind`. pub fn make_backend(base_url: String) -> Box { match detect_backend_kind() { BackendKind::Vllm => Box::new(Vllm::new(base_url)), BackendKind::LlamaCpp => Box::new(LlamaCpp::new(base_url)), + // detect_backend_kind never returns Ollama, but the match must be + // exhaustive — fall through to the safest local default. + BackendKind::Ollama => Box::new(LlamaCpp::new(base_url)), + } +} + +/// Settings-driven constructor: pick the backend by explicit `BackendKind`, +/// using each backend's own URL/model where applicable. +/// +/// The `inference_url` argument feeds llama.cpp / vLLM (they share the +/// OpenAI-compat HTTP wire format); `ollama_url` + `ollama_model` feed the +/// Ollama adapter. The two URLs are separate because a user can have an +/// Ollama daemon AND llama-server running on the same machine on different +/// ports; we don't want either's config to be shadowed by the other's. +pub fn make_backend_for_kind( + kind: BackendKind, + inference_url: String, + ollama_url: String, + ollama_model: String, +) -> Box { + match kind { + BackendKind::LlamaCpp => Box::new(LlamaCpp::new(inference_url)), + BackendKind::Vllm => Box::new(Vllm::new(inference_url)), + BackendKind::Ollama => Box::new(Ollama::new(ollama_url, ollama_model)), } } From 6963c3f55dd772123668b21bdd01357cbd8d8213 Mon Sep 17 00:00:00 2001 From: Brand Date: Thu, 11 Jun 2026 15:41:31 -0600 Subject: [PATCH 3/4] fix(settings): use #[derive(Default)] for Backend (clippy::derivable_impls) CI's `clippy -D warnings` rejected the manual `impl Default for Backend`. Switch to the derive-with-#[default] form (same observable behavior: Backend::default() = Backend::Auto). Tests unchanged. Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/ocm-daemon/src/settings.rs | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/crates/ocm-daemon/src/settings.rs b/crates/ocm-daemon/src/settings.rs index 9325906..374fcdd 100644 --- a/crates/ocm-daemon/src/settings.rs +++ b/crates/ocm-daemon/src/settings.rs @@ -45,22 +45,17 @@ pub enum Theme { } /// Inference backend selection. Wire-format is lowercase TOML (`backend = "ollama"`). -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Copy)] +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Copy, Default)] #[serde(rename_all = "lowercase")] pub enum Backend { /// Platform-detect (existing pre-v0.1.1 behavior). + #[default] Auto, LlamaCpp, Vllm, Ollama, } -impl Default for Backend { - fn default() -> Self { - Backend::Auto - } -} - impl Default for Settings { fn default() -> Self { Self { From d8b126cca26a78ad6c5a0c006384e64954d84e9d Mon Sep 17 00:00:00 2001 From: Brand Date: Thu, 11 Jun 2026 15:42:21 -0600 Subject: [PATCH 4/4] feat(frontend): settings UI for backend + Ollama URL + Ollama model Mirrors the new Settings fields on the Rust side (crates/ocm-daemon/src/ settings.rs): backend, ollama_base_url, ollama_model. settings.ts: - Backend type ('auto' | 'llamacpp' | 'vllm' | 'ollama'), matches Rust serde - Three new fields on the Settings interface settings/+page.svelte: - backend + + + + + + + + +