Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions hindsight-api-slim/hindsight_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
ENV_REFLECT_MAX_CONTEXT_TOKENS = "HINDSIGHT_API_REFLECT_MAX_CONTEXT_TOKENS"
ENV_REFLECT_WALL_TIMEOUT = "HINDSIGHT_API_REFLECT_WALL_TIMEOUT"
ENV_REFLECT_MISSION = "HINDSIGHT_API_REFLECT_MISSION"
ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS = "HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS"

# Disposition settings
ENV_DISPOSITION_SKEPTICISM = "HINDSIGHT_API_DISPOSITION_SKEPTICISM"
Expand Down Expand Up @@ -503,6 +504,7 @@ def normalize_config_dict(config: dict[str, Any]) -> dict[str, Any]:
DEFAULT_REFLECT_MAX_ITERATIONS = 10 # Max tool call iterations before forcing response
DEFAULT_REFLECT_MAX_CONTEXT_TOKENS = 100_000 # Max accumulated context tokens before forcing final prompt
DEFAULT_REFLECT_WALL_TIMEOUT = 300 # Wall-clock timeout in seconds for the entire reflect operation (5 minutes)
DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS = -1 # Token budget for source facts in search_observations (-1 = disabled)

# Disposition defaults (None = not set, fall back to bank DB value or 3)
DEFAULT_DISPOSITION_SKEPTICISM = None
Expand Down Expand Up @@ -774,6 +776,7 @@ class HindsightConfig:

# Reflect agent settings
reflect_mission: str | None
reflect_source_facts_max_tokens: int

# Disposition settings (hierarchical - can be overridden per bank; None = fall back to DB)
disposition_skepticism: int | None
Expand Down Expand Up @@ -872,6 +875,7 @@ class HindsightConfig:
"observations_mission",
# Reflect settings
"reflect_mission",
"reflect_source_facts_max_tokens",
# Disposition settings
"disposition_skepticism",
"disposition_literalism",
Expand Down Expand Up @@ -1281,6 +1285,9 @@ def from_env(cls) -> "HindsightConfig":
),
reflect_wall_timeout=int(os.getenv(ENV_REFLECT_WALL_TIMEOUT, str(DEFAULT_REFLECT_WALL_TIMEOUT))),
reflect_mission=os.getenv(ENV_REFLECT_MISSION) or None,
reflect_source_facts_max_tokens=int(
os.getenv(ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS, str(DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS))
),
# Disposition settings (None = fall back to DB value)
disposition_skepticism=int(os.getenv(ENV_DISPOSITION_SKEPTICISM))
if os.getenv(ENV_DISPOSITION_SKEPTICISM)
Expand Down
9 changes: 8 additions & 1 deletion hindsight-api-slim/hindsight_api/engine/memory_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import httpx
import tiktoken

from ..config import get_config
from ..config import DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS, get_config
from ..metrics import get_metrics_collector
from ..tracing import create_operation_span
from ..utils import mask_network_location
Expand Down Expand Up @@ -5262,6 +5262,12 @@ async def search_mental_models_fn(q: str, max_results: int = 5) -> dict[str, Any
pending_consolidation=pending_consolidation,
)

# Get reflect source facts config (hierarchical: env → tenant → bank)
config_dict = await self._config_resolver.get_bank_config(bank_id, request_context)
reflect_source_facts_max_tokens = config_dict.get(
"reflect_source_facts_max_tokens", DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS
)

async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, Any]:
return await tool_search_observations(
self,
Expand All @@ -5274,6 +5280,7 @@ async def search_observations_fn(q: str, max_tokens: int = 5000) -> dict[str, An
tag_groups=tag_groups,
last_consolidated_at=last_consolidated_at,
pending_consolidation=pending_consolidation,
source_facts_max_tokens=reflect_source_facts_max_tokens,
)

# Determine which tools to enable based on fact_types and exclude_mental_models
Expand Down
10 changes: 9 additions & 1 deletion hindsight-api-slim/hindsight_api/engine/reflect/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ async def tool_search_observations(
tag_groups: "list | None" = None,
last_consolidated_at: datetime | None = None,
pending_consolidation: int = 0,
source_facts_max_tokens: int = -1,
) -> dict[str, Any]:
"""
Search consolidated observations using recall.
Expand All @@ -151,10 +152,16 @@ async def tool_search_observations(
tags_match: How to match tags - "any" (OR), "all" (AND)
last_consolidated_at: When consolidation last ran (for staleness check)
pending_consolidation: Number of memories waiting to be consolidated
source_facts_max_tokens: Token budget for source facts (-1 = disabled, 0+ = enabled with limit)

Returns:
Dict with matching observations including freshness info and source memories
"""
include_source_facts = source_facts_max_tokens != -1
recall_kwargs: dict[str, Any] = {}
if include_source_facts and source_facts_max_tokens > 0:
recall_kwargs["max_source_facts_tokens"] = source_facts_max_tokens

result = await memory_engine.recall_async(
bank_id=bank_id,
query=query,
Expand All @@ -165,9 +172,10 @@ async def tool_search_observations(
tags=tags,
tags_match=tags_match,
tag_groups=tag_groups,
include_source_facts=False,
include_source_facts=include_source_facts,
_connection_budget=1,
_quiet=True,
**recall_kwargs,
)

is_stale = pending_consolidation > 0
Expand Down
4 changes: 2 additions & 2 deletions hindsight-api-slim/tests/test_hierarchical_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ async def test_hierarchical_fields_categorization():
assert "entity_labels" in configurable

# Verify count is correct
assert len(configurable) == 19
assert len(configurable) == 20

# Verify credential fields (NEVER exposed)
assert "llm_api_key" in credentials
Expand Down Expand Up @@ -402,7 +402,7 @@ async def test_config_get_bank_config_no_static_or_credential_fields_leak(memory
assert field in config, f"Expected configurable field '{field}' missing from config"

# Should have a small number of configurable fields (not hundreds)
assert len(config) < 20, f"Too many fields returned: {len(config)}"
assert len(config) < 25, f"Too many fields returned: {len(config)}"

finally:
await memory.delete_bank(bank_id, request_context=request_context)
Expand Down
129 changes: 129 additions & 0 deletions hindsight-api-slim/tests/test_reflect_source_facts_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
"""
Tests for reflect search_observations source_facts_max_tokens configuration.

Verifies that the source_facts_max_tokens parameter correctly controls
whether source facts are included in search_observations recall calls.
"""

from unittest.mock import AsyncMock, MagicMock, patch

import pytest

from hindsight_api.engine.reflect.tools import tool_search_observations
from hindsight_api.engine.response_models import RecallResult


def _make_mock_engine(recall_result=None):
"""Create a mock memory engine with a recall_async method."""
if recall_result is None:
recall_result = RecallResult(results=[], source_facts={})
engine = MagicMock()
engine.recall_async = AsyncMock(return_value=recall_result)
return engine


@pytest.fixture
def mock_request_context():
return MagicMock()


class TestSearchObservationsSourceFacts:
"""Test source_facts_max_tokens parameter in tool_search_observations."""

@pytest.mark.asyncio
async def test_default_disables_source_facts(self, mock_request_context):
"""Default source_facts_max_tokens=-1 should disable source facts."""
engine = _make_mock_engine()

await tool_search_observations(
engine, "bank-1", "test query", mock_request_context
)

engine.recall_async.assert_called_once()
call_kwargs = engine.recall_async.call_args.kwargs
assert call_kwargs["include_source_facts"] is False
assert "max_source_facts_tokens" not in call_kwargs

@pytest.mark.asyncio
async def test_zero_enables_source_facts_unlimited(self, mock_request_context):
"""source_facts_max_tokens=0 should enable source facts with no token limit."""
engine = _make_mock_engine()

await tool_search_observations(
engine, "bank-1", "test query", mock_request_context,
source_facts_max_tokens=0,
)

engine.recall_async.assert_called_once()
call_kwargs = engine.recall_async.call_args.kwargs
assert call_kwargs["include_source_facts"] is True
assert "max_source_facts_tokens" not in call_kwargs

@pytest.mark.asyncio
async def test_positive_enables_source_facts_with_limit(self, mock_request_context):
"""source_facts_max_tokens>0 should enable source facts with a token budget."""
engine = _make_mock_engine()

await tool_search_observations(
engine, "bank-1", "test query", mock_request_context,
source_facts_max_tokens=5000,
)

engine.recall_async.assert_called_once()
call_kwargs = engine.recall_async.call_args.kwargs
assert call_kwargs["include_source_facts"] is True
assert call_kwargs["max_source_facts_tokens"] == 5000

@pytest.mark.asyncio
async def test_negative_one_disables_source_facts(self, mock_request_context):
"""Explicit -1 should disable source facts (same as default)."""
engine = _make_mock_engine()

await tool_search_observations(
engine, "bank-1", "test query", mock_request_context,
source_facts_max_tokens=-1,
)

engine.recall_async.assert_called_once()
call_kwargs = engine.recall_async.call_args.kwargs
assert call_kwargs["include_source_facts"] is False
assert "max_source_facts_tokens" not in call_kwargs


class TestReflectSourceFactsConfig:
"""Test that reflect_source_facts_max_tokens is properly wired in HindsightConfig."""

def test_config_field_exists(self):
"""reflect_source_facts_max_tokens should be a valid config field."""
from hindsight_api.config import HindsightConfig

import dataclasses

field_names = {f.name for f in dataclasses.fields(HindsightConfig)}
assert "reflect_source_facts_max_tokens" in field_names

def test_config_is_configurable(self):
"""reflect_source_facts_max_tokens should be a configurable (per-bank) field."""
from hindsight_api.config import HindsightConfig

assert "reflect_source_facts_max_tokens" in HindsightConfig.get_configurable_fields()

def test_default_value_is_disabled(self):
"""Default should be -1 (disabled)."""
from hindsight_api.config import DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS

assert DEFAULT_REFLECT_SOURCE_FACTS_MAX_TOKENS == -1

def test_env_var_constant_exists(self):
"""Env var constant should be defined."""
from hindsight_api.config import ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS

assert ENV_REFLECT_SOURCE_FACTS_MAX_TOKENS == "HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS"

@patch.dict("os.environ", {"HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS": "8000"})
def test_from_env_reads_value(self):
"""from_env should parse the env var."""
from hindsight_api.config import HindsightConfig

config = HindsightConfig.from_env()
assert config.reflect_source_facts_max_tokens == 8000
1 change: 1 addition & 0 deletions hindsight-docs/docs/developer/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -912,6 +912,7 @@ export HINDSIGHT_API_OBSERVATIONS_MISSION="Observations are recurring patterns i
| `HINDSIGHT_API_REFLECT_MAX_CONTEXT_TOKENS` | Max accumulated context tokens in the reflect loop before forcing final synthesis. Prevents `context_length_exceeded` errors on large banks. Lower this if your LLM has a context window smaller than 128K. | `100000` |
| `HINDSIGHT_API_REFLECT_WALL_TIMEOUT` | Wall-clock timeout in seconds for the entire reflect operation. If exceeded, the request returns HTTP 504. | `300` |
| `HINDSIGHT_API_REFLECT_MISSION` | Global reflect mission (identity and reasoning framing). Overridden per bank via config API. | - |
| `HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS` | Token budget for source facts in `search_observations` during reflect. `-1` disables source facts (default), `0` enables with no limit, `>0` enables with a token budget. Hierarchical — can be overridden per bank via config API. | `-1` |

#### Disposition

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -912,6 +912,7 @@ export HINDSIGHT_API_OBSERVATIONS_MISSION="Observations are recurring patterns i
| `HINDSIGHT_API_REFLECT_MAX_CONTEXT_TOKENS` | Max accumulated context tokens in the reflect loop before forcing final synthesis. Prevents `context_length_exceeded` errors on large banks. Lower this if your LLM has a context window smaller than 128K. | `100000` |
| `HINDSIGHT_API_REFLECT_WALL_TIMEOUT` | Wall-clock timeout in seconds for the entire reflect operation. If exceeded, the request returns HTTP 504. | `300` |
| `HINDSIGHT_API_REFLECT_MISSION` | Global reflect mission (identity and reasoning framing). Overridden per bank via config API. | - |
| `HINDSIGHT_API_REFLECT_SOURCE_FACTS_MAX_TOKENS` | Token budget for source facts in `search_observations` during reflect. `-1` disables source facts (default), `0` enables with no limit, `>0` enables with a token budget. Hierarchical — can be overridden per bank via config API. | `-1` |

#### Disposition

Expand Down
Loading