diff --git a/hindsight-api-slim/hindsight_api/config.py b/hindsight-api-slim/hindsight_api/config.py index 15572805..9c0a63c6 100644 --- a/hindsight-api-slim/hindsight_api/config.py +++ b/hindsight-api-slim/hindsight_api/config.py @@ -340,6 +340,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]: ENV_REFLECT_MAX_CONTEXT_TOKENS = "HINDSIGHT_API_REFLECT_MAX_CONTEXT_TOKENS" ENV_REFLECT_WALL_TIMEOUT = "HINDSIGHT_API_REFLECT_WALL_TIMEOUT" ENV_REFLECT_MISSION = "HINDSIGHT_API_REFLECT_MISSION" +ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS = "HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS" # Disposition settings ENV_DISPOSITION_SKEPTICISM = "HINDSIGHT_API_DISPOSITION_SKEPTICISM" @@ -503,6 +504,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]: DEFAULT_REFLECT_MAX_ITERATIONS = 10 # Max tool call iterations before forcing response DEFAULT_REFLECT_MAX_CONTEXT_TOKENS = 100_000 # Max accumulated context tokens before forcing final prompt DEFAULT_REFLECT_WALL_TIMEOUT = 300 # Wall-clock timeout in seconds for the entire reflect operation (5 minutes) +DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS = -1 # Token budget for source facts in search_observations (-1 = disabled) # Disposition defaults (None = not set, fall back to bank DB value or 3) DEFAULT_DISPOSITION_SKEPTICISM = None @@ -774,6 +776,7 @@ class HindsightConfig: # Reflect agent settings reflect_mission: str | None + reflect_source_facts_max_tokens: int # Disposition settings (hierarchical - can be overridden per bank; None = fall back to DB) disposition_skepticism: int | None @@ -872,6 +875,7 @@ class HindsightConfig: "observations_mission", # Reflect settings "reflect_mission", + "reflect_source_facts_max_tokens", # Disposition settings "disposition_skepticism", "disposition_literalism", @@ -1281,6 +1285,9 @@ def from_env(cls) -> "HindsightConfig": ), reflect_wall_timeout=int(os.getenv(ENV_REFLECT_WALL_TIMEOUT, str(DEFAULT_REFLECT_WALL_TIMEOUT))), reflect_mission=os.getenv(ENV_REFLECT_MISSION) or None, + reflect_source_facts_max_tokens=int( + os.getenv(ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS, str(DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS)) + ), # Disposition settings (None = fall back to DB value) disposition_skepticism=int(os.getenv(ENV_DISPOSITION_SKEPTICISM)) if os.getenv(ENV_DISPOSITION_SKEPTICISM) diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py index 8b31ff4b..de3d8c6c 100644 --- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py +++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py @@ -23,7 +23,7 @@ import httpx import tiktoken -from ..config import get_config +from ..config import DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS, get_config from ..metrics import get_metrics_collector from ..tracing import create_operation_span from ..utils import mask_network_location @@ -5262,6 +5262,12 @@ async def search_mental_models_fn(q: str, max_results: int = 5) -> dict[str, Any pending_consolidation=pending_consolidation, ) + # Get reflect source facts config (hierarchical: env → tenant → bank) + config_dict = await self._config_resolver.get_bank_config(bank_id, request_context) + reflect_source_facts_max_tokens = config_dict.get( + "reflect_source_facts_max_tokens", DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS + ) + async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, Any]: return await tool_search_observations( self, @@ -5274,6 +5280,7 @@ async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, An tag_groups=tag_groups, last_consolidated_at=last_consolidated_at, pending_consolidation=pending_consolidation, + source_facts_max_tokens=reflect_source_facts_max_tokens, ) # Determine which tools to enable based on fact_types and exclude_mental_models diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/tools.py b/hindsight-api-slim/hindsight_api/engine/reflect/tools.py index e0b092fe..e2add564 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/tools.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/tools.py @@ -134,6 +134,7 @@ async def tool_search_observations( tag_groups: "list | None" = None, last_consolidated_at: datetime | None = None, pending_consolidation: int = 0, + source_facts_max_tokens: int = -1, ) -> dict[str, Any]: """ Search consolidated observations using recall. @@ -151,10 +152,16 @@ async def tool_search_observations( tags_match: How to match tags - "any" (OR), "all" (AND) last_consolidated_at: When consolidation last ran (for staleness check) pending_consolidation: Number of memories waiting to be consolidated + source_facts_max_tokens: Token budget for source facts (-1 = disabled, 0+ = enabled with limit) Returns: Dict with matching observations including freshness info and source memories """ + include_source_facts = source_facts_max_tokens != -1 + recall_kwargs: dict[str, Any] = {} + if include_source_facts and source_facts_max_tokens > 0: + recall_kwargs["max_source_facts_tokens"] = source_facts_max_tokens + result = await memory_engine.recall_async( bank_id=bank_id, query=query, @@ -165,9 +172,10 @@ async def tool_search_observations( tags=tags, tags_match=tags_match, tag_groups=tag_groups, - include_source_facts=False, + include_source_facts=include_source_facts, _connection_budget=1, _quiet=True, + **recall_kwargs, ) is_stale = pending_consolidation > 0 diff --git a/hindsight-api-slim/tests/test_hierarchical_config.py b/hindsight-api-slim/tests/test_hierarchical_config.py index 4b254a79..203ea6ab 100644 --- a/hindsight-api-slim/tests/test_hierarchical_config.py +++ b/hindsight-api-slim/tests/test_hierarchical_config.py @@ -89,7 +89,7 @@ async def test_hierarchical_fields_categorization(): assert "entity_labels" in configurable # Verify count is correct - assert len(configurable) == 19 + assert len(configurable) == 20 # Verify credential fields (NEVER exposed) assert "llm_api_key" in credentials @@ -402,7 +402,7 @@ async def test_config_get_bank_config_no_static_or_credential_fields_leak(memory assert field in config, f"Expected configurable field '{field}' missing from config" # Should have a small number of configurable fields (not hundreds) - assert len(config) < 20, f"Too many fields returned: {len(config)}" + assert len(config) < 25, f"Too many fields returned: {len(config)}" finally: await memory.delete_bank(bank_id, request_context=request_context) diff --git a/hindsight-api-slim/tests/test_reflect_source_facts_config.py b/hindsight-api-slim/tests/test_reflect_source_facts_config.py new file mode 100644 index 00000000..e9097e3b --- /dev/null +++ b/hindsight-api-slim/tests/test_reflect_source_facts_config.py @@ -0,0 +1,129 @@ +""" +Tests for reflect search_observations source_facts_max_tokens configuration. + +Verifies that the source_facts_max_tokens parameter correctly controls +whether source facts are included in search_observations recall calls. +""" + +from unittest.mock import AsyncMock, MagicMock, patch + +import pytest + +from hindsight_api.engine.reflect.tools import tool_search_observations +from hindsight_api.engine.response_models import RecallResult + + +def _make_mock_engine(recall_result=None): + """Create a mock memory engine with a recall_async method.""" + if recall_result is None: + recall_result = RecallResult(results=[], source_facts={}) + engine = MagicMock() + engine.recall_async = AsyncMock(return_value=recall_result) + return engine + + +@pytest.fixture +def mock_request_context(): + return MagicMock() + + +class TestSearchObservationsSourceFacts: + """Test source_facts_max_tokens parameter in tool_search_observations.""" + + @pytest.mark.asyncio + async def test_default_disables_source_facts(self, mock_request_context): + """Default source_facts_max_tokens=-1 should disable source facts.""" + engine = _make_mock_engine() + + await tool_search_observations( + engine, "bank-1", "test query", mock_request_context + ) + + engine.recall_async.assert_called_once() + call_kwargs = engine.recall_async.call_args.kwargs + assert call_kwargs["include_source_facts"] is False + assert "max_source_facts_tokens" not in call_kwargs + + @pytest.mark.asyncio + async def test_zero_enables_source_facts_unlimited(self, mock_request_context): + """source_facts_max_tokens=0 should enable source facts with no token limit.""" + engine = _make_mock_engine() + + await tool_search_observations( + engine, "bank-1", "test query", mock_request_context, + source_facts_max_tokens=0, + ) + + engine.recall_async.assert_called_once() + call_kwargs = engine.recall_async.call_args.kwargs + assert call_kwargs["include_source_facts"] is True + assert "max_source_facts_tokens" not in call_kwargs + + @pytest.mark.asyncio + async def test_positive_enables_source_facts_with_limit(self, mock_request_context): + """source_facts_max_tokens>0 should enable source facts with a token budget.""" + engine = _make_mock_engine() + + await tool_search_observations( + engine, "bank-1", "test query", mock_request_context, + source_facts_max_tokens=5000, + ) + + engine.recall_async.assert_called_once() + call_kwargs = engine.recall_async.call_args.kwargs + assert call_kwargs["include_source_facts"] is True + assert call_kwargs["max_source_facts_tokens"] == 5000 + + @pytest.mark.asyncio + async def test_negative_one_disables_source_facts(self, mock_request_context): + """Explicit -1 should disable source facts (same as default).""" + engine = _make_mock_engine() + + await tool_search_observations( + engine, "bank-1", "test query", mock_request_context, + source_facts_max_tokens=-1, + ) + + engine.recall_async.assert_called_once() + call_kwargs = engine.recall_async.call_args.kwargs + assert call_kwargs["include_source_facts"] is False + assert "max_source_facts_tokens" not in call_kwargs + + +class TestReflectSourceFactsConfig: + """Test that reflect_source_facts_max_tokens is properly wired in HindsightConfig.""" + + def test_config_field_exists(self): + """reflect_source_facts_max_tokens should be a valid config field.""" + from hindsight_api.config import HindsightConfig + + import dataclasses + + field_names = {f.name for f in dataclasses.fields(HindsightConfig)} + assert "reflect_source_facts_max_tokens" in field_names + + def test_config_is_configurable(self): + """reflect_source_facts_max_tokens should be a configurable (per-bank) field.""" + from hindsight_api.config import HindsightConfig + + assert "reflect_source_facts_max_tokens" in HindsightConfig.get_configurable_fields() + + def test_default_value_is_disabled(self): + """Default should be -1 (disabled).""" + from hindsight_api.config import DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS + + assert DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS == -1 + + def test_env_var_constant_exists(self): + """Env var constant should be defined.""" + from hindsight_api.config import ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS + + assert ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS == "HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS" + + @patch.dict("os.environ", {"HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS": "8000"}) + def test_from_env_reads_value(self): + """from_env should parse the env var.""" + from hindsight_api.config import HindsightConfig + + config = HindsightConfig.from_env() + assert config.reflect_source_facts_max_tokens == 8000 diff --git a/hindsight-docs/docs/developer/configuration.md b/hindsight-docs/docs/developer/configuration.md index b2166696..6a14273a 100644 --- a/hindsight-docs/docs/developer/configuration.md +++ b/hindsight-docs/docs/developer/configuration.md @@ -912,6 +912,7 @@ export HINDSIGHT_API_OBSERVATIONS_MISSION="Observations are recurring patterns i | `HINDSIGHT_API_REFLECT_MAX_CONTEXT_TOKENS` | Max accumulated context tokens in the reflect loop before forcing final synthesis. Prevents `context_length_exceeded` errors on large banks. Lower this if your LLM has a context window smaller than 128K. | `100000` | | `HINDSIGHT_API_REFLECT_WALL_TIMEOUT` | Wall-clock timeout in seconds for the entire reflect operation. If exceeded, the request returns HTTP 504. | `300` | | `HINDSIGHT_API_REFLECT_MISSION` | Global reflect mission (identity and reasoning framing). Overridden per bank via config API. | - | +| `HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS` | Token budget for source facts in `search_observations` during reflect. `-1` disables source facts (default), `0` enables with no limit, `>0` enables with a token budget. Hierarchical — can be overridden per bank via config API. | `-1` | #### Disposition diff --git a/skills/hindsight-docs/references/developer/configuration.md b/skills/hindsight-docs/references/developer/configuration.md index 01429392..dd6ad06c 100644 --- a/skills/hindsight-docs/references/developer/configuration.md +++ b/skills/hindsight-docs/references/developer/configuration.md @@ -912,6 +912,7 @@ export HINDSIGHT_API_OBSERVATIONS_MISSION="Observations are recurring patterns i | `HINDSIGHT_API_REFLECT_MAX_CONTEXT_TOKENS` | Max accumulated context tokens in the reflect loop before forcing final synthesis. Prevents `context_length_exceeded` errors on large banks. Lower this if your LLM has a context window smaller than 128K. | `100000` | | `HINDSIGHT_API_REFLECT_WALL_TIMEOUT` | Wall-clock timeout in seconds for the entire reflect operation. If exceeded, the request returns HTTP 504. | `300` | | `HINDSIGHT_API_REFLECT_MISSION` | Global reflect mission (identity and reasoning framing). Overridden per bank via config API. | - | +| `HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS` | Token budget for source facts in `search_observations` during reflect. `-1` disables source facts (default), `0` enables with no limit, `>0` enables with a token budget. Hierarchical — can be overridden per bank via config API. | `-1` | #### Disposition