diff --git a/crates/ocm-daemon/src/bootstrap.rs b/crates/ocm-daemon/src/bootstrap.rs index 9591eae..88727b5 100644 --- a/crates/ocm-daemon/src/bootstrap.rs +++ b/crates/ocm-daemon/src/bootstrap.rs @@ -7,8 +7,9 @@ //! functionality and logs warnings. The user gets a Tauri tray + window //! that reports status; chat requests fail with clear errors. -use crate::settings::Settings; -use ocm_inference::selector; +use crate::settings::{Backend, Settings}; +use ocm_inference::ollama::DEFAULT_MODEL as DEFAULT_OLLAMA_MODEL; +use ocm_inference::selector::{self, BackendKind, DEFAULT_OLLAMA_BASE_URL}; use std::net::{IpAddr, Ipv4Addr, SocketAddr}; use std::sync::Arc; use std::time::Duration; @@ -60,6 +61,17 @@ async fn probe_url(base: &str, path: &str) -> bool { matches!(client.get(&url).send().await, Ok(r) if r.status().is_success()) } +/// Resolve `Settings.backend` to a concrete `BackendKind`. `Auto` delegates to +/// the existing platform detect; explicit settings win over detection. +fn resolve_backend_kind(setting: Backend) -> BackendKind { + match setting { + Backend::Auto => selector::detect_backend_kind(), + Backend::LlamaCpp => BackendKind::LlamaCpp, + Backend::Vllm => BackendKind::Vllm, + Backend::Ollama => BackendKind::Ollama, + } +} + /// Construct the full AppState given settings. pub fn build_app_state(settings: &Settings) -> ocm_api::AppState { let inference_url = settings @@ -70,8 +82,18 @@ pub fn build_app_state(settings: &Settings) -> ocm_api::AppState { .mem0_base_url .clone() .unwrap_or_else(|| DEFAULT_MEM0_BASE_URL.to_string()); + let ollama_url = settings + .ollama_base_url + .clone() + .unwrap_or_else(|| DEFAULT_OLLAMA_BASE_URL.to_string()); + let ollama_model = settings + .ollama_model + .clone() + .unwrap_or_else(|| DEFAULT_OLLAMA_MODEL.to_string()); - let backend = selector::make_backend(inference_url); + let kind = resolve_backend_kind(settings.backend); + info!(backend = kind.as_str(), "selected inference backend"); + let backend = selector::make_backend_for_kind(kind, inference_url, ollama_url, ollama_model); let memory = Arc::new(ocm_memory::Mem0Client::new(memory_url, "ocm-default")); let backend: Arc = Arc::from(backend); @@ -132,7 +154,7 @@ pub async fn bootstrap(settings: Settings) { #[cfg(test)] mod tests { use super::*; - use crate::settings::Theme; + use crate::settings::{Backend, Theme}; fn test_settings() -> Settings { Settings { @@ -143,6 +165,9 @@ mod tests { inference_base_url: Some("http://127.0.0.1:18080".into()), mem0_base_url: Some("http://127.0.0.1:18765".into()), retrieval_top_k: Some(3), + backend: Backend::Auto, + ollama_base_url: None, + ollama_model: None, } } @@ -154,6 +179,7 @@ mod tests { let state = build_app_state(&s); assert_eq!(state.retrieval_top_k, DEFAULT_RETRIEVAL_TOP_K); // backend / memory clients are constructed; concrete name depends on platform + // (Auto never picks Ollama — it's opt-in). let backend_name = state.backend.name(); assert!(backend_name == "llama.cpp" || backend_name == "vLLM"); } @@ -165,6 +191,59 @@ mod tests { assert_eq!(state.retrieval_top_k, 3); } + #[test] + fn explicit_ollama_backend_is_wired_through_to_app_state() { + // The headline v0.1.1 wiring assertion: a user who selects backend = + // "ollama" in settings ends up with an Ollama InferenceBackend on the + // live AppState. Verified by the trait's `name()` ("Ollama" — see + // ocm_inference::ollama::Ollama::name). + let s = Settings { + backend: Backend::Ollama, + ollama_base_url: Some("http://127.0.0.1:11434".into()), + ollama_model: Some("llama3".into()), + ..Settings::default() + }; + let state = build_app_state(&s); + assert_eq!(state.backend.name(), "Ollama"); + } + + #[test] + fn explicit_ollama_uses_defaults_when_fields_unset() { + // backend = "ollama" with no URL/model still produces a constructible + // Ollama backend — bootstrap fills in the daemon's native defaults + // (port 11434, the existing ollama::DEFAULT_MODEL). + let s = Settings { + backend: Backend::Ollama, + ollama_base_url: None, + ollama_model: None, + ..Settings::default() + }; + let state = build_app_state(&s); + assert_eq!(state.backend.name(), "Ollama"); + } + + #[test] + fn explicit_llamacpp_overrides_platform_detect() { + // Users on a CUDA box who explicitly pick llama.cpp must get llama.cpp, + // even if auto-detect would have picked vLLM. + let s = Settings { + backend: Backend::LlamaCpp, + ..Settings::default() + }; + let state = build_app_state(&s); + assert_eq!(state.backend.name(), "llama.cpp"); + } + + #[test] + fn explicit_vllm_overrides_platform_detect() { + let s = Settings { + backend: Backend::Vllm, + ..Settings::default() + }; + let state = build_app_state(&s); + assert_eq!(state.backend.name(), "vLLM"); + } + #[tokio::test] async fn probe_url_returns_false_for_unreachable() { // Using port 1 (privileged, almost guaranteed not bound) on localhost diff --git a/crates/ocm-daemon/src/settings.rs b/crates/ocm-daemon/src/settings.rs index f4779a8..374fcdd 100644 --- a/crates/ocm-daemon/src/settings.rs +++ b/crates/ocm-daemon/src/settings.rs @@ -18,6 +18,22 @@ pub struct Settings { /// Number of memories to retrieve per chat turn. Default 5. Set to 0 to disable. #[serde(default)] pub retrieval_top_k: Option, + /// Which inference backend to use. `Auto` (default) preserves pre-v0.1.1 + /// behavior: platform-detect picks llama.cpp on Mac/Windows/CPU-Linux and + /// vLLM on CUDA Linux. Explicit `LlamaCpp` / `Vllm` / `Ollama` override + /// detection — the Ollama branch is the "I have an Ollama daemon already, + /// point OCM at it" zero-extra-process path. + #[serde(default)] + pub backend: Backend, + /// Override the Ollama daemon URL. Only consulted when `backend = "ollama"`. + /// Default (when unset) is `http://127.0.0.1:11434` — Ollama's installed default. + #[serde(default)] + pub ollama_base_url: Option, + /// Ollama model tag (e.g. `llama3`, `qwen2.5:7b`). REQUIRED by the Ollama + /// native API — there is no server-side default. When unset, bootstrap + /// falls back to `ocm_inference::ollama::DEFAULT_MODEL`. + #[serde(default)] + pub ollama_model: Option, } #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Copy)] @@ -28,6 +44,18 @@ pub enum Theme { System, } +/// Inference backend selection. Wire-format is lowercase TOML (`backend = "ollama"`). +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Copy, Default)] +#[serde(rename_all = "lowercase")] +pub enum Backend { + /// Platform-detect (existing pre-v0.1.1 behavior). + #[default] + Auto, + LlamaCpp, + Vllm, + Ollama, +} + impl Default for Settings { fn default() -> Self { Self { @@ -38,6 +66,9 @@ impl Default for Settings { inference_base_url: None, mem0_base_url: None, retrieval_top_k: None, + backend: Backend::Auto, + ollama_base_url: None, + ollama_model: None, } } } @@ -83,4 +114,82 @@ mod tests { let s = Settings::load_or_default(&path).unwrap(); assert_eq!(s, Settings::default()); } + + #[test] + fn default_backend_is_auto() { + // Auto preserves the platform-detect behavior that shipped before this + // field existed; explicit selection (LlamaCpp / Vllm / Ollama) is opt-in. + let s = Settings::default(); + assert_eq!(s.backend, Backend::Auto); + assert_eq!(s.ollama_base_url, None); + assert_eq!(s.ollama_model, None); + } + + #[test] + fn backend_serializes_lowercase() { + // TOML keys are lowercase by convention; matches Theme's serde shape. + let raw = toml::to_string(&Settings { + backend: Backend::Ollama, + ..Settings::default() + }) + .unwrap(); + assert!(raw.contains("backend = \"ollama\"")); + } + + #[test] + fn ollama_settings_round_trip_via_toml() { + let dir = tempdir().unwrap(); + let path = dir.path().join("settings.toml"); + let original = Settings { + backend: Backend::Ollama, + ollama_base_url: Some("http://127.0.0.1:11434".into()), + ollama_model: Some("llama3".into()), + ..Settings::default() + }; + original.save(&path).unwrap(); + let loaded = Settings::load_or_default(&path).unwrap(); + assert_eq!(loaded, original); + assert_eq!(loaded.backend, Backend::Ollama); + assert_eq!( + loaded.ollama_base_url.as_deref(), + Some("http://127.0.0.1:11434") + ); + assert_eq!(loaded.ollama_model.as_deref(), Some("llama3")); + } + + #[test] + fn legacy_settings_toml_without_backend_field_still_parses() { + // Forward-compat: users with a settings.toml written before v0.1.1 + // (no `backend` key) must still load — the new field defaults to Auto. + let dir = tempdir().unwrap(); + let path = dir.path().join("settings.toml"); + let legacy = r#" +api_port = 7300 +mcp_enabled = true +theme = "system" +"#; + std::fs::write(&path, legacy).unwrap(); + let loaded = Settings::load_or_default(&path).unwrap(); + assert_eq!(loaded.backend, Backend::Auto); + assert_eq!(loaded.ollama_base_url, None); + assert_eq!(loaded.ollama_model, None); + } + + #[test] + fn all_backend_variants_round_trip() { + for kind in [ + Backend::Auto, + Backend::LlamaCpp, + Backend::Vllm, + Backend::Ollama, + ] { + let s = Settings { + backend: kind, + ..Settings::default() + }; + let raw = toml::to_string(&s).unwrap(); + let back: Settings = toml::from_str(&raw).unwrap(); + assert_eq!(back.backend, kind, "round-trip failed for {kind:?}"); + } + } } diff --git a/crates/ocm-inference/src/selector.rs b/crates/ocm-inference/src/selector.rs index a11f023..5a3819f 100644 --- a/crates/ocm-inference/src/selector.rs +++ b/crates/ocm-inference/src/selector.rs @@ -6,13 +6,19 @@ //! - Linux without CUDA -> llama.cpp (CPU) //! - Windows -> llama.cpp //! - everything else -> llama.cpp (safest fallback) +//! +//! Ollama is **opt-in** (Settings.backend = "ollama"); it is never returned +//! from `detect_backend_kind()`. Auto-detect picks between the two backends +//! OCM can supervise itself; Ollama bridges to an *external* daemon and is +//! a deliberate user choice. -use crate::{llamacpp::LlamaCpp, vllm::Vllm, InferenceBackend}; +use crate::{llamacpp::LlamaCpp, ollama::Ollama, vllm::Vllm, InferenceBackend}; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BackendKind { LlamaCpp, Vllm, + Ollama, } impl BackendKind { @@ -20,10 +26,16 @@ impl BackendKind { match self { BackendKind::LlamaCpp => "llama.cpp", BackendKind::Vllm => "vLLM", + // Kept in lockstep with Ollama::name() so log/telemetry labels match. + BackendKind::Ollama => "Ollama", } } } +/// Native Ollama daemon default — the daemon binds 127.0.0.1:11434 out of the +/// box. Bootstrap falls back to this when Settings.ollama_base_url is unset. +pub const DEFAULT_OLLAMA_BASE_URL: &str = "http://127.0.0.1:11434"; + pub fn detect_backend_kind() -> BackendKind { if cfg!(target_os = "macos") { BackendKind::LlamaCpp @@ -47,10 +59,36 @@ fn has_cuda() -> bool { false } +/// Auto-detect-only constructor preserved for back-compat. Settings-driven +/// callers (the daemon's bootstrap) should use `make_backend_for_kind`. pub fn make_backend(base_url: String) -> Box { match detect_backend_kind() { BackendKind::Vllm => Box::new(Vllm::new(base_url)), BackendKind::LlamaCpp => Box::new(LlamaCpp::new(base_url)), + // detect_backend_kind never returns Ollama, but the match must be + // exhaustive — fall through to the safest local default. + BackendKind::Ollama => Box::new(LlamaCpp::new(base_url)), + } +} + +/// Settings-driven constructor: pick the backend by explicit `BackendKind`, +/// using each backend's own URL/model where applicable. +/// +/// The `inference_url` argument feeds llama.cpp / vLLM (they share the +/// OpenAI-compat HTTP wire format); `ollama_url` + `ollama_model` feed the +/// Ollama adapter. The two URLs are separate because a user can have an +/// Ollama daemon AND llama-server running on the same machine on different +/// ports; we don't want either's config to be shadowed by the other's. +pub fn make_backend_for_kind( + kind: BackendKind, + inference_url: String, + ollama_url: String, + ollama_model: String, +) -> Box { + match kind { + BackendKind::LlamaCpp => Box::new(LlamaCpp::new(inference_url)), + BackendKind::Vllm => Box::new(Vllm::new(inference_url)), + BackendKind::Ollama => Box::new(Ollama::new(ollama_url, ollama_model)), } } @@ -60,6 +98,8 @@ mod tests { #[test] fn detect_returns_one_of_two_kinds() { + // `detect_backend_kind()` is the AUTO-detect path; it never picks Ollama + // (Ollama is opt-in via Settings.backend, not platform-default). let kind = detect_backend_kind(); assert!(matches!(kind, BackendKind::LlamaCpp | BackendKind::Vllm)); } @@ -93,6 +133,7 @@ mod tests { let expected = match detect_backend_kind() { BackendKind::LlamaCpp => "llama.cpp", BackendKind::Vllm => "vLLM", + BackendKind::Ollama => "Ollama", }; assert_eq!(backend.name(), expected); } @@ -101,5 +142,48 @@ mod tests { fn backend_kind_as_str_matches_name() { assert_eq!(BackendKind::LlamaCpp.as_str(), "llama.cpp"); assert_eq!(BackendKind::Vllm.as_str(), "vLLM"); + // The Ollama adapter's InferenceBackend::name() returns "Ollama" — keep + // as_str() in lockstep so log/telemetry never disagree on labels. + assert_eq!(BackendKind::Ollama.as_str(), "Ollama"); + } + + #[test] + fn default_ollama_base_url_is_native_daemon_port() { + // 11434 is the Ollama daemon's installed default; if this ever changes + // upstream we want the test to force us to revisit the constant. + assert_eq!(DEFAULT_OLLAMA_BASE_URL, "http://127.0.0.1:11434"); + } + + #[test] + fn make_backend_for_kind_ollama_constructs_ollama_backend() { + let backend = make_backend_for_kind( + BackendKind::Ollama, + "http://127.0.0.1:8080".to_string(), + "http://127.0.0.1:11434".to_string(), + "llama3".to_string(), + ); + assert_eq!(backend.name(), "Ollama"); + } + + #[test] + fn make_backend_for_kind_llamacpp_ignores_ollama_args() { + let backend = make_backend_for_kind( + BackendKind::LlamaCpp, + "http://127.0.0.1:8080".to_string(), + "http://127.0.0.1:11434".to_string(), + "llama3".to_string(), + ); + assert_eq!(backend.name(), "llama.cpp"); + } + + #[test] + fn make_backend_for_kind_vllm_ignores_ollama_args() { + let backend = make_backend_for_kind( + BackendKind::Vllm, + "http://127.0.0.1:8000".to_string(), + "http://127.0.0.1:11434".to_string(), + "llama3".to_string(), + ); + assert_eq!(backend.name(), "vLLM"); } } diff --git a/frontend/src/lib/settings.ts b/frontend/src/lib/settings.ts index 8f89d56..8c810a8 100644 --- a/frontend/src/lib/settings.ts +++ b/frontend/src/lib/settings.ts @@ -3,6 +3,11 @@ export type Theme = 'dark' | 'light' | 'system'; +// Mirror of crate::settings::Backend (serde lowercase). `auto` preserves +// platform-detect (the pre-v0.1.1 default). `ollama` is the zero-extra-process +// path — point OCM at an already-running Ollama daemon. +export type Backend = 'auto' | 'llamacpp' | 'vllm' | 'ollama'; + export interface Settings { model_id: string | null; api_port: number; @@ -11,6 +16,9 @@ export interface Settings { inference_base_url: string | null; mem0_base_url: string | null; retrieval_top_k: number | null; + backend: Backend; + ollama_base_url: string | null; + ollama_model: string | null; } import { invoke } from './tauri'; diff --git a/frontend/src/routes/settings/+page.svelte b/frontend/src/routes/settings/+page.svelte index 24733ba..c3f8423 100644 --- a/frontend/src/routes/settings/+page.svelte +++ b/frontend/src/routes/settings/+page.svelte @@ -1,6 +1,12 @@
@@ -112,6 +127,17 @@ {/each} + + + + + + + + +