vectorize-io · nicoloboschi · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026 · Mar 25, 2026
diff --git a/hindsight-api-slim/hindsight_api/config.py b/hindsight-api-slim/hindsight_api/config.py
@@ -340,6 +340,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
 ENV_REFLECT_MAX_CONTEXT_TOKENS = "HINDSIGHT_API_REFLECT_MAX_CONTEXT_TOKENS"
 ENV_REFLECT_WALL_TIMEOUT = "HINDSIGHT_API_REFLECT_WALL_TIMEOUT"
 ENV_REFLECT_MISSION = "HINDSIGHT_API_REFLECT_MISSION"
+ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS = "HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS"
 
 # Disposition settings
 ENV_DISPOSITION_SKEPTICISM = "HINDSIGHT_API_DISPOSITION_SKEPTICISM"
@@ -503,6 +504,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
 DEFAULT_REFLECT_MAX_ITERATIONS = 10  # Max tool call iterations before forcing response
 DEFAULT_REFLECT_MAX_CONTEXT_TOKENS = 100_000  # Max accumulated context tokens before forcing final prompt
 DEFAULT_REFLECT_WALL_TIMEOUT = 300  # Wall-clock timeout in seconds for the entire reflect operation (5 minutes)
+DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS = -1  # Token budget for source facts in search_observations (-1 = disabled)
 
 # Disposition defaults (None = not set, fall back to bank DB value or 3)
 DEFAULT_DISPOSITION_SKEPTICISM = None
@@ -774,6 +776,7 @@ class HindsightConfig:
 
     # Reflect agent settings
     reflect_mission: str | None
+    reflect_source_facts_max_tokens: int
 
     # Disposition settings (hierarchical - can be overridden per bank; None = fall back to DB)
     disposition_skepticism: int | None
@@ -872,6 +875,7 @@ class HindsightConfig:
         "observations_mission",
         # Reflect settings
         "reflect_mission",
+        "reflect_source_facts_max_tokens",
         # Disposition settings
         "disposition_skepticism",
         "disposition_literalism",
@@ -1281,6 +1285,9 @@ def from_env(cls) -> "HindsightConfig":
             ),
             reflect_wall_timeout=int(os.getenv(ENV_REFLECT_WALL_TIMEOUT, str(DEFAULT_REFLECT_WALL_TIMEOUT))),
             reflect_mission=os.getenv(ENV_REFLECT_MISSION) or None,
+            reflect_source_facts_max_tokens=int(
+                os.getenv(ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS, str(DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS))
+            ),
             # Disposition settings (None = fall back to DB value)
             disposition_skepticism=int(os.getenv(ENV_DISPOSITION_SKEPTICISM))
             if os.getenv(ENV_DISPOSITION_SKEPTICISM)

diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
@@ -23,7 +23,7 @@
 import httpx
 import tiktoken
 
-from ..config import get_config
+from ..config import DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS, get_config
 from ..metrics import get_metrics_collector
 from ..tracing import create_operation_span
 from ..utils import mask_network_location
@@ -5262,6 +5262,12 @@ async def search_mental_models_fn(q: str, max_results: int = 5) -> dict[str, Any
                     pending_consolidation=pending_consolidation,
                 )
 
+        # Get reflect source facts config (hierarchical: env → tenant → bank)
+        config_dict = await self._config_resolver.get_bank_config(bank_id, request_context)
+        reflect_source_facts_max_tokens = config_dict.get(
+            "reflect_source_facts_max_tokens", DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS
+        )
+
         async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, Any]:
             return await tool_search_observations(
                 self,
@@ -5274,6 +5280,7 @@ async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, An
                 tag_groups=tag_groups,
                 last_consolidated_at=last_consolidated_at,
                 pending_consolidation=pending_consolidation,
+                source_facts_max_tokens=reflect_source_facts_max_tokens,
             )
 
         # Determine which tools to enable based on fact_types and exclude_mental_models

diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/tools.py b/hindsight-api-slim/hindsight_api/engine/reflect/tools.py
@@ -134,6 +134,7 @@ async def tool_search_observations(
     tag_groups: "list | None" = None,
     last_consolidated_at: datetime | None = None,
     pending_consolidation: int = 0,
+    source_facts_max_tokens: int = -1,
 ) -> dict[str, Any]:
     """
     Search consolidated observations using recall.
@@ -151,10 +152,16 @@ async def tool_search_observations(
         tags_match: How to match tags - "any" (OR), "all" (AND)
         last_consolidated_at: When consolidation last ran (for staleness check)
         pending_consolidation: Number of memories waiting to be consolidated
+        source_facts_max_tokens: Token budget for source facts (-1 = disabled, 0+ = enabled with limit)
 
     Returns:
         Dict with matching observations including freshness info and source memories
     """
+    include_source_facts = source_facts_max_tokens != -1
+    recall_kwargs: dict[str, Any] = {}
+    if include_source_facts and source_facts_max_tokens > 0:
+        recall_kwargs["max_source_facts_tokens"] = source_facts_max_tokens
+
     result = await memory_engine.recall_async(
         bank_id=bank_id,
         query=query,
@@ -165,9 +172,10 @@ async def tool_search_observations(
         tags=tags,
         tags_match=tags_match,
         tag_groups=tag_groups,
-        include_source_facts=False,
+        include_source_facts=include_source_facts,
         _connection_budget=1,
         _quiet=True,
+        **recall_kwargs,
     )
 
     is_stale = pending_consolidation > 0

diff --git a/hindsight-api-slim/tests/test_hierarchical_config.py b/hindsight-api-slim/tests/test_hierarchical_config.py
@@ -89,7 +89,7 @@ async def test_hierarchical_fields_categorization():
     assert "entity_labels" in configurable
 
     # Verify count is correct
-    assert len(configurable) == 19
+    assert len(configurable) == 20
 
     # Verify credential fields (NEVER exposed)
     assert "llm_api_key" in credentials
@@ -402,7 +402,7 @@ async def test_config_get_bank_config_no_static_or_credential_fields_leak(memory
             assert field in config, f"Expected configurable field '{field}' missing from config"
 
         # Should have a small number of configurable fields (not hundreds)
-        assert len(config) < 20, f"Too many fields returned: {len(config)}"
+        assert len(config) < 25, f"Too many fields returned: {len(config)}"
 
     finally:
         await memory.delete_bank(bank_id, request_context=request_context)

diff --git a/hindsight-api-slim/tests/test_reflect_source_facts_config.py b/hindsight-api-slim/tests/test_reflect_source_facts_config.py
@@ -0,0 +1,129 @@
+"""
+Tests for reflect search_observations source_facts_max_tokens configuration.
+
+Verifies that the source_facts_max_tokens parameter correctly controls
+whether source facts are included in search_observations recall calls.
+"""
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+from hindsight_api.engine.reflect.tools import tool_search_observations
+from hindsight_api.engine.response_models import RecallResult
+
+
+def _make_mock_engine(recall_result=None):
+    """Create a mock memory engine with a recall_async method."""
+    if recall_result is None:
+        recall_result = RecallResult(results=[], source_facts={})
+    engine = MagicMock()
+    engine.recall_async = AsyncMock(return_value=recall_result)
+    return engine
+
+
+@pytest.fixture
+def mock_request_context():
+    return MagicMock()
+
+
+class TestSearchObservationsSourceFacts:
+    """Test source_facts_max_tokens parameter in tool_search_observations."""
+
+    @pytest.mark.asyncio
+    async def test_default_disables_source_facts(self, mock_request_context):
+        """Default source_facts_max_tokens=-1 should disable source facts."""
+        engine = _make_mock_engine()
+
+        await tool_search_observations(
+            engine, "bank-1", "test query", mock_request_context
+        )
+
+        engine.recall_async.assert_called_once()
+        call_kwargs = engine.recall_async.call_args.kwargs
+        assert call_kwargs["include_source_facts"] is False
+        assert "max_source_facts_tokens" not in call_kwargs
+
+    @pytest.mark.asyncio
+    async def test_zero_enables_source_facts_unlimited(self, mock_request_context):
+        """source_facts_max_tokens=0 should enable source facts with no token limit."""
+        engine = _make_mock_engine()
+
+        await tool_search_observations(
+            engine, "bank-1", "test query", mock_request_context,
+            source_facts_max_tokens=0,
+        )
+
+        engine.recall_async.assert_called_once()
+        call_kwargs = engine.recall_async.call_args.kwargs
+        assert call_kwargs["include_source_facts"] is True
+        assert "max_source_facts_tokens" not in call_kwargs
+
+    @pytest.mark.asyncio
+    async def test_positive_enables_source_facts_with_limit(self, mock_request_context):
+        """source_facts_max_tokens>0 should enable source facts with a token budget."""
+        engine = _make_mock_engine()
+
+        await tool_search_observations(
+            engine, "bank-1", "test query", mock_request_context,
+            source_facts_max_tokens=5000,
+        )
+
+        engine.recall_async.assert_called_once()
+        call_kwargs = engine.recall_async.call_args.kwargs
+        assert call_kwargs["include_source_facts"] is True
+        assert call_kwargs["max_source_facts_tokens"] == 5000
+
+    @pytest.mark.asyncio
+    async def test_negative_one_disables_source_facts(self, mock_request_context):
+        """Explicit -1 should disable source facts (same as default)."""
+        engine = _make_mock_engine()
+
+        await tool_search_observations(
+            engine, "bank-1", "test query", mock_request_context,
+            source_facts_max_tokens=-1,
+        )
+
+        engine.recall_async.assert_called_once()
+        call_kwargs = engine.recall_async.call_args.kwargs
+        assert call_kwargs["include_source_facts"] is False
+        assert "max_source_facts_tokens" not in call_kwargs
+
+
+class TestReflectSourceFactsConfig:
+    """Test that reflect_source_facts_max_tokens is properly wired in HindsightConfig."""
+
+    def test_config_field_exists(self):
+        """reflect_source_facts_max_tokens should be a valid config field."""
+        from hindsight_api.config import HindsightConfig
+
+        import dataclasses
+
+        field_names = {f.name for f in dataclasses.fields(HindsightConfig)}
+        assert "reflect_source_facts_max_tokens" in field_names
+
+    def test_config_is_configurable(self):
+        """reflect_source_facts_max_tokens should be a configurable (per-bank) field."""
+        from hindsight_api.config import HindsightConfig
+
+        assert "reflect_source_facts_max_tokens" in HindsightConfig.get_configurable_fields()
+
+    def test_default_value_is_disabled(self):
+        """Default should be -1 (disabled)."""
+        from hindsight_api.config import DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS
+
+        assert DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS == -1
+
+    def test_env_var_constant_exists(self):
+        """Env var constant should be defined."""
+        from hindsight_api.config import ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS
+
+        assert ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS == "HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS"
+
+    @patch.dict("os.environ", {"HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS": "8000"})
+    def test_from_env_reads_value(self):
+        """from_env should parse the env var."""
+        from hindsight_api.config import HindsightConfig
+
+        config = HindsightConfig.from_env()
+        assert config.reflect_source_facts_max_tokens == 8000
diff --git a/hindsight-docs/docs/developer/configuration.md b/hindsight-docs/docs/developer/configuration.md
@@ -912,6 +912,7 @@ export HINDSIGHT_API_OBSERVATIONS_MISSION="Observations are recurring patterns i
 | `HINDSIGHT_API_REFLECT_MAX_CONTEXT_TOKENS` | Max accumulated context tokens in the reflect loop before forcing final synthesis. Prevents `context_length_exceeded` errors on large banks. Lower this if your LLM has a context window smaller than 128K. | `100000` |
 | `HINDSIGHT_API_REFLECT_WALL_TIMEOUT` | Wall-clock timeout in seconds for the entire reflect operation. If exceeded, the request returns HTTP 504. | `300` |
 | `HINDSIGHT_API_REFLECT_MISSION` | Global reflect mission (identity and reasoning framing). Overridden per bank via config API. | - |
+| `HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS` | Token budget for source facts in `search_observations` during reflect. `-1` disables source facts (default), `0` enables with no limit, `>0` enables with a token budget. Hierarchical — can be overridden per bank via config API. | `-1` |
 
 #### Disposition
 

diff --git a/skills/hindsight-docs/references/developer/configuration.md b/skills/hindsight-docs/references/developer/configuration.md
@@ -912,6 +912,7 @@ export HINDSIGHT_API_OBSERVATIONS_MISSION="Observations are recurring patterns i
 | `HINDSIGHT_API_REFLECT_MAX_CONTEXT_TOKENS` | Max accumulated context tokens in the reflect loop before forcing final synthesis. Prevents `context_length_exceeded` errors on large banks. Lower this if your LLM has a context window smaller than 128K. | `100000` |
 | `HINDSIGHT_API_REFLECT_WALL_TIMEOUT` | Wall-clock timeout in seconds for the entire reflect operation. If exceeded, the request returns HTTP 504. | `300` |
 | `HINDSIGHT_API_REFLECT_MISSION` | Global reflect mission (identity and reasoning framing). Overridden per bank via config API. | - |
+| `HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS` | Token budget for source facts in `search_observations` during reflect. `-1` disables source facts (default), `0` enables with no limit, `>0` enables with a token budget. Hierarchical — can be overridden per bank via config API. | `-1` |
 
 #### Disposition