Skip to content
This repository was archived by the owner on Mar 2, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion src/valence/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from __future__ import annotations

from pydantic import Field
from pydantic import Field, model_validator
from pydantic_settings import BaseSettings, SettingsConfigDict


Expand Down Expand Up @@ -229,6 +229,24 @@ class CoreSettings(BaseSettings):
# These are checked via os.environ in health.py

# ==========================================================================
# VALIDATORS
# ==========================================================================

@model_validator(mode="after")
def _auto_select_embedding_provider(self) -> CoreSettings:
"""Auto-select openai embedding provider when API key is available."""
# Only auto-select if no explicit provider was set (still default "local")
# and an OpenAI API key is available
if self.embedding_provider == "local" and self.openai_api_key:
import os

# Check if VALENCE_EMBEDDING_PROVIDER was explicitly set
if not os.environ.get("VALENCE_EMBEDDING_PROVIDER"):
Comment on lines +241 to +244
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The explicit-configuration check relies on os.environ.get("VALENCE_EMBEDDING_PROVIDER"), but BaseSettings values can come from non-os.environ sources (e.g., passing embedding_provider="local" to CoreSettings(...), or values loaded from an env_file in non-test runs). In those cases this validator can incorrectly override an explicit local setting to openai. Prefer detecting whether the field was provided via settings inputs (e.g., using self.model_fields_set to see if embedding_provider was set) rather than inspecting os.environ directly.

Suggested change
import os
# Check if VALENCE_EMBEDDING_PROVIDER was explicitly set
if not os.environ.get("VALENCE_EMBEDDING_PROVIDER"):
# Only auto-select when the embedding_provider field was *not*
# explicitly provided via any settings source.
if "embedding_provider" not in self.model_fields_set:

Copilot uses AI. Check for mistakes.
self.embedding_provider = "openai"
return self

# ==========================================================================
Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There’s a section divider comment indented under the validator after return self, which is unreachable and makes the file structure confusing. Consider removing it or unindenting it so the section headers align consistently with the rest of the module.

Suggested change
# ==========================================================================
# ==========================================================================

Copilot uses AI. Check for mistakes.

# COMPUTED PROPERTIES
# ==========================================================================

Expand Down
36 changes: 36 additions & 0 deletions tests/core/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,3 +383,39 @@ def test_int_coercion_seed_port(self, monkeypatch, clean_env):
settings = CoreSettings()
assert settings.seed_port == 9000
assert isinstance(settings.seed_port, int)


# ============================================================================
# Embedding Provider Auto-Detection (#72)
# ============================================================================


class TestEmbeddingProviderAutoDetect:
"""Test auto-detection of embedding provider based on available API keys."""

def test_autoselects_openai_when_key_available(self, monkeypatch, clean_env):
"""When OPENAI_API_KEY is set and VALENCE_EMBEDDING_PROVIDER is NOT set,
provider should auto-select to 'openai'."""
monkeypatch.setenv("OPENAI_API_KEY", "sk-test-autodetect-key")
monkeypatch.delenv("VALENCE_EMBEDDING_PROVIDER", raising=False)

settings = CoreSettings()
assert settings.embedding_provider == "openai"

def test_explicit_local_override_respected(self, monkeypatch, clean_env):
"""When VALENCE_EMBEDDING_PROVIDER is explicitly set to 'local',
it should stay 'local' even if OPENAI_API_KEY is set (user override)."""
monkeypatch.setenv("OPENAI_API_KEY", "sk-test-explicit-local")
monkeypatch.setenv("VALENCE_EMBEDDING_PROVIDER", "local")

settings = CoreSettings()
assert settings.embedding_provider == "local"

Copy link

Copilot AI Feb 28, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These tests cover env-var-based overrides, but don’t cover the common direct-Python usage where a caller might explicitly pass embedding_provider via CoreSettings(embedding_provider=...). Given the validator logic, adding a test that an explicit embedding_provider="local" (via kwargs) is not auto-overridden when OPENAI_API_KEY is set would help prevent regressions.

Suggested change
def test_kwarg_local_override_respected(self, monkeypatch, clean_env):
"""When embedding_provider='local' is passed via kwargs, it should stay
'local' even if OPENAI_API_KEY is set (direct Python override)."""
monkeypatch.setenv("OPENAI_API_KEY", "sk-test-kwarg-local")
monkeypatch.delenv("VALENCE_EMBEDDING_PROVIDER", raising=False)
settings = CoreSettings(embedding_provider="local")
assert settings.embedding_provider == "local"

Copilot uses AI. Check for mistakes.
def test_stays_local_when_no_key(self, monkeypatch, clean_env):
"""When neither OPENAI_API_KEY nor VALENCE_EMBEDDING_PROVIDER is set,
provider should remain the default 'local'."""
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
monkeypatch.delenv("VALENCE_EMBEDDING_PROVIDER", raising=False)

settings = CoreSettings()
assert settings.embedding_provider == "local"