From 83f3627ab2b3f2063a7b74cb012ce01a1e5f0120 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= Date: Wed, 25 Mar 2026 10:39:19 +0100 Subject: [PATCH 1/4] fix: return metadata in recall responses (#674) Metadata stored during retain was never retrieved during recall. Add metadata to all SQL SELECT queries, the RetrievalResult dataclass, ScoredResult.to_dict(), and MemoryFact construction in the recall pipeline. --- hindsight-api-slim/hindsight_api/engine/memory_engine.py | 4 +++- .../hindsight_api/engine/search/graph_retrieval.py | 2 +- .../hindsight_api/engine/search/mpfp_retrieval.py | 2 +- .../hindsight_api/engine/search/retrieval.py | 8 ++++---- hindsight-api-slim/hindsight_api/engine/search/types.py | 3 +++ 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py index 3300976af..8b31ff4b7 100644 --- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py +++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py @@ -3234,7 +3234,7 @@ def to_tuple_format(results): source_rows = await sf_conn.fetch( f""" SELECT id, text, fact_type, context, occurred_start, occurred_end, - mentioned_at, document_id, chunk_id, tags + mentioned_at, document_id, chunk_id, tags, metadata FROM {fq_table("memory_units")} WHERE id = ANY($1::uuid[]) """, @@ -3255,6 +3255,7 @@ def _make_source_fact(sid: str, r: Any) -> MemoryFact: occurred_end=r["occurred_end"].isoformat() if r["occurred_end"] else None, mentioned_at=r["mentioned_at"].isoformat() if r["mentioned_at"] else None, document_id=r["document_id"], + metadata=r["metadata"], chunk_id=str(r["chunk_id"]) if r["chunk_id"] else None, tags=r["tags"] or None, ) @@ -3333,6 +3334,7 @@ def _make_source_fact(sid: str, r: Any) -> MemoryFact: occurred_end=result_dict.get("occurred_end"), mentioned_at=result_dict.get("mentioned_at"), document_id=result_dict.get("document_id"), + metadata=result_dict.get("metadata"), chunk_id=result_dict.get("chunk_id"), tags=result_dict.get("tags"), source_fact_ids=source_fact_ids_by_obs.get(result_id) if include_source_facts else None, diff --git a/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py b/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py index ce60963f4..7cdde9290 100644 --- a/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py +++ b/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py @@ -231,7 +231,7 @@ async def _retrieve_with_conn( f""" SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, - mu.document_id, mu.chunk_id, mu.tags, + mu.document_id, mu.chunk_id, mu.tags, mu.metadata, ml.weight, ml.link_type, ml.from_unit_id FROM {fq_table("memory_links")} ml JOIN {fq_table("memory_units")} mu ON ml.to_unit_id = mu.id diff --git a/hindsight-api-slim/hindsight_api/engine/search/mpfp_retrieval.py b/hindsight-api-slim/hindsight_api/engine/search/mpfp_retrieval.py index ad53914bc..e8d938cd2 100644 --- a/hindsight-api-slim/hindsight_api/engine/search/mpfp_retrieval.py +++ b/hindsight-api-slim/hindsight_api/engine/search/mpfp_retrieval.py @@ -449,7 +449,7 @@ async def fetch_memory_units_by_ids( rows = await conn.fetch( f""" SELECT id, text, context, event_date, occurred_start, occurred_end, - mentioned_at, fact_type, document_id, chunk_id, tags + mentioned_at, fact_type, document_id, chunk_id, tags, metadata FROM {fq_table("memory_units")} WHERE id = ANY($1::uuid[]) AND fact_type = $2 diff --git a/hindsight-api-slim/hindsight_api/engine/search/retrieval.py b/hindsight-api-slim/hindsight_api/engine/search/retrieval.py index 403855d1f..bca52afdf 100644 --- a/hindsight-api-slim/hindsight_api/engine/search/retrieval.py +++ b/hindsight-api-slim/hindsight_api/engine/search/retrieval.py @@ -148,7 +148,7 @@ async def retrieve_semantic_bm25_combined( cols = ( "id, text, context, event_date, occurred_start, occurred_end, mentioned_at, " - "fact_type, document_id, chunk_id, tags" + "fact_type, document_id, chunk_id, tags, metadata" ) table = fq_table("memory_units") @@ -343,7 +343,7 @@ async def retrieve_temporal_combined( {groups_clause} ), sim_ranked AS ( - SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, + SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, mu.metadata, 1 - (mu.embedding <=> $1::vector) AS similarity, ROW_NUMBER() OVER (PARTITION BY mu.fact_type ORDER BY mu.embedding <=> $1::vector) AS sim_rn FROM date_ranked dr @@ -351,7 +351,7 @@ async def retrieve_temporal_combined( WHERE dr.rn <= 50 AND (1 - (mu.embedding <=> $1::vector)) >= $6 ) - SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags, similarity + SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags, metadata, similarity FROM sim_ranked WHERE sim_rn <= 10 """, @@ -449,7 +449,7 @@ async def retrieve_temporal_combined( # bank_id on memory_units lets the planner use idx_memory_units_bank_fact_type. neighbors = await conn.fetch( f""" - SELECT src.from_unit_id, mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, + SELECT src.from_unit_id, mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, mu.metadata, l.weight, l.link_type, 1 - (mu.embedding <=> $1::vector) AS similarity FROM unnest($2::uuid[]) AS src(from_unit_id) diff --git a/hindsight-api-slim/hindsight_api/engine/search/types.py b/hindsight-api-slim/hindsight_api/engine/search/types.py index 68913eabd..90edcf0ee 100644 --- a/hindsight-api-slim/hindsight_api/engine/search/types.py +++ b/hindsight-api-slim/hindsight_api/engine/search/types.py @@ -47,6 +47,7 @@ class RetrievalResult: document_id: str | None = None chunk_id: str | None = None tags: list[str] | None = None # Visibility scope tags + metadata: dict[str, str] | None = None # User-provided metadata # Retrieval-specific scores (only one will be set depending on retrieval method) similarity: float | None = None # Semantic retrieval @@ -70,6 +71,7 @@ def from_db_row(cls, row: dict[str, Any]) -> "RetrievalResult": document_id=row.get("document_id"), chunk_id=row.get("chunk_id"), tags=row.get("tags"), + metadata=row.get("metadata"), similarity=row.get("similarity"), bm25_score=row.get("bm25_score"), activation=row.get("activation"), @@ -153,6 +155,7 @@ def to_dict(self) -> dict[str, Any]: "document_id": self.retrieval.document_id, "chunk_id": self.retrieval.chunk_id, "tags": self.retrieval.tags, + "metadata": self.retrieval.metadata, "semantic_similarity": self.retrieval.similarity, "bm25_score": self.retrieval.bm25_score, } From 3f5644460e635bf1fe6fbb54e736f648e06a5212 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= Date: Wed, 25 Mar 2026 10:45:36 +0100 Subject: [PATCH 2/4] =?UTF-8?q?test:=20add=20metadata=20round-trip=20test?= =?UTF-8?q?=20for=20retain=E2=86=92recall?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace placeholder metadata test with one that actually passes metadata via retain_batch_async and asserts it is returned on recall. --- hindsight-api-slim/tests/test_retain.py | 34 +++++++++++++++---------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/hindsight-api-slim/tests/test_retain.py b/hindsight-api-slim/tests/test_retain.py index 1c4481cbb..de2e1dc82 100644 --- a/hindsight-api-slim/tests/test_retain.py +++ b/hindsight-api-slim/tests/test_retain.py @@ -814,34 +814,36 @@ async def test_context_with_batch(memory, request_context): @pytest.mark.asyncio async def test_metadata_storage_and_retrieval(memory, request_context): """ - Test that user-defined metadata is preserved. + Test that user-defined metadata passed during retain is returned on recall. Metadata allows arbitrary key-value data to be stored with facts. """ bank_id = f"test_metadata_{datetime.now(timezone.utc).timestamp()}" try: - # Store content with custom metadata custom_metadata = { "source": "slack", "channel": "engineering", "importance": "high", - "tags": "product,launch" } - # Note: retain_async doesn't directly support metadata parameter - # Metadata would need to be supported in the API layer - # For now, we test that the system handles content without errors - unit_ids = await memory.retain_async( + # Use retain_batch_async which supports the metadata parameter + unit_ids_list = await memory.retain_batch_async( bank_id=bank_id, - content="The product launch is scheduled for March 1st.", - context="planning meeting", - event_date=datetime(2024, 1, 15, tzinfo=timezone.utc), + contents=[ + { + "content": "The product launch is scheduled for March 1st.", + "context": "planning meeting", + "event_date": datetime(2024, 1, 15, tzinfo=timezone.utc), + "metadata": custom_metadata, + } + ], request_context=request_context, ) - assert len(unit_ids) > 0, "Should create memory units" + assert len(unit_ids_list) > 0, "Should create memory units" + assert len(unit_ids_list[0]) > 0, "Should have at least one unit ID" - # Recall to verify storage worked + # Recall and verify metadata is returned result = await memory.recall_async( bank_id=bank_id, query="When is the product launch?", @@ -853,8 +855,12 @@ async def test_metadata_storage_and_retrieval(memory, request_context): assert len(result.results) > 0, "Should recall stored facts" - print("✓ Successfully stored and retrieved facts") - print(" (Note: Metadata support depends on API implementation)") + # Verify metadata is present on recalled facts + fact = result.results[0] + assert fact.metadata is not None, "Metadata should not be null on recall" + assert fact.metadata.get("source") == "slack" + assert fact.metadata.get("channel") == "engineering" + assert fact.metadata.get("importance") == "high" finally: await memory.delete_bank(bank_id, request_context=request_context) From 39a94999fcba035f8d56399468a825dfcec530ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= Date: Wed, 25 Mar 2026 11:11:09 +0100 Subject: [PATCH 3/4] fix: parse metadata JSON string from database in MemoryFact asyncpg may return JSONB columns as strings. Add a field_validator to MemoryFact.metadata to handle JSON string deserialization. --- .../hindsight_api/engine/response_models.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/hindsight-api-slim/hindsight_api/engine/response_models.py b/hindsight-api-slim/hindsight_api/engine/response_models.py index f31d39c62..065bb4ffc 100644 --- a/hindsight-api-slim/hindsight_api/engine/response_models.py +++ b/hindsight-api-slim/hindsight_api/engine/response_models.py @@ -8,7 +8,7 @@ from typing import Any -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, field_validator # Valid fact types for recall operations (excludes 'opinion' which is deprecated) VALID_RECALL_FACT_TYPES = frozenset(["world", "experience", "observation"]) @@ -159,6 +159,19 @@ class MemoryFact(BaseModel): mentioned_at: str | None = Field(None, description="ISO format date when the fact was mentioned/learned") document_id: str | None = Field(None, description="ID of the document this memory belongs to") metadata: dict[str, str] | None = Field(None, description="User-defined metadata") + + @field_validator("metadata", mode="before") + @classmethod + def parse_metadata(cls, v: Any) -> dict[str, str] | None: + """Parse metadata from JSON string if needed (asyncpg may return JSONB as str).""" + if v is None: + return None + if isinstance(v, str): + import json + + return json.loads(v) + return v + chunk_id: str | None = Field( None, description="ID of the chunk this fact was extracted from (format: bank_id_document_id_chunk_index)" ) From aa53f0e58406a808ca0de27999b36339181c929d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= Date: Wed, 25 Mar 2026 17:57:22 +0100 Subject: [PATCH 4/4] fix: control plane UI fixes for recall and data view - Sanitize NaN cross-encoder scores to 0.0 in reranking pipeline (Pydantic serializes NaN as JSON null, breaking UI score display) - Add null-coalesce for score in search debug view to prevent crash - Switch data view text filter from debounced onChange to Enter key (avoids slow ILIKE queries on every keystroke for large banks) - Show loading spinner in search icon during filter requests - Preserve search/tag filters when clicking "Load more" --- .../hindsight_api/engine/search/reranking.py | 17 +++++-- .../src/components/data-view.tsx | 48 +++++++++---------- .../src/components/search-debug-view.tsx | 2 +- 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/hindsight-api-slim/hindsight_api/engine/search/reranking.py b/hindsight-api-slim/hindsight_api/engine/search/reranking.py index 0d948aa70..7d0c37cfe 100644 --- a/hindsight-api-slim/hindsight_api/engine/search/reranking.py +++ b/hindsight-api-slim/hindsight_api/engine/search/reranking.py @@ -153,6 +153,8 @@ async def rerank(self, query: str, candidates: list[MergedCandidate]) -> list[Sc # Normalize scores using sigmoid to [0, 1] range # Cross-encoder returns logits which can be negative + import math + import numpy as np def sigmoid(x): @@ -163,11 +165,20 @@ def sigmoid(x): # Create ScoredResult objects with cross-encoder scores scored_results = [] for candidate, raw_score, norm_score in zip(candidates, scores, normalized_scores): + # Sanitize NaN scores (cross-encoder can return NaN for certain inputs). + # NaN propagates through all downstream scoring and Pydantic serializes + # NaN as JSON null, which breaks clients expecting numeric values. + raw = float(raw_score) + norm = float(norm_score) + if math.isnan(raw): + raw = 0.0 + if math.isnan(norm): + norm = 0.0 scored_result = ScoredResult( candidate=candidate, - cross_encoder_score=float(raw_score), - cross_encoder_score_normalized=float(norm_score), - weight=float(norm_score), # Initial weight is just cross-encoder score + cross_encoder_score=raw, + cross_encoder_score_normalized=norm, + weight=norm, # Initial weight is just cross-encoder score ) scored_results.append(scored_result) diff --git a/hindsight-control-plane/src/components/data-view.tsx b/hindsight-control-plane/src/components/data-view.tsx index 40b8d2c82..6d9922e30 100644 --- a/hindsight-control-plane/src/components/data-view.tsx +++ b/hindsight-control-plane/src/components/data-view.tsx @@ -227,29 +227,19 @@ export function DataView({ factType }: DataViewProps) { // Reset to first page when filters change useEffect(() => { setCurrentPage(1); - }, [searchQuery, tagFilters]); - - // Debounce ref for text search - const searchDebounceRef = useRef | null>(null); + }, [tagFilters]); - // Trigger server-side reload when text filter changes (debounced 300ms) - useEffect(() => { - if (searchDebounceRef.current) { - clearTimeout(searchDebounceRef.current); + // Trigger text search on Enter key + const executeSearch = () => { + if (currentBank) { + setCurrentPage(1); + loadData( + undefined, + searchQuery || undefined, + tagFilters.length > 0 ? tagFilters : undefined + ); } - searchDebounceRef.current = setTimeout(() => { - if (currentBank) { - loadData( - undefined, - searchQuery || undefined, - tagFilters.length > 0 ? tagFilters : undefined - ); - } - }, 300); - return () => { - if (searchDebounceRef.current) clearTimeout(searchDebounceRef.current); - }; - }, [searchQuery]); + }; // Trigger server-side reload immediately when tag filters change useEffect(() => { @@ -292,12 +282,22 @@ export function DataView({ factType }: DataViewProps) {
{/* Text search */}
- + {loading ? ( + + ) : ( + + )} setSearchQuery(e.target.value)} - placeholder="Filter by text or context..." + onKeyDown={(e) => { + if (e.key === "Enter") { + e.preventDefault(); + executeSearch(); + } + }} + placeholder="Filter by text or context (press Enter)..." className="pl-8 h-9" />
@@ -356,7 +356,7 @@ export function DataView({ factType }: DataViewProps) { onClick={() => { const newLimit = Math.min(data.total_units, fetchLimit + 1000); setFetchLimit(newLimit); - loadData(newLimit); + loadData(newLimit, searchQuery || undefined, tagFilters.length > 0 ? tagFilters : undefined); }} className="ml-2 text-primary hover:underline" > diff --git a/hindsight-control-plane/src/components/search-debug-view.tsx b/hindsight-control-plane/src/components/search-debug-view.tsx index f278bc421..293e57fec 100644 --- a/hindsight-control-plane/src/components/search-debug-view.tsx +++ b/hindsight-control-plane/src/components/search-debug-view.tsx @@ -411,7 +411,7 @@ export function SearchDebugView() {
-
{score.toFixed(3)}
+
{(score ?? 0).toFixed(3)}
score