From 83f3627ab2b3f2063a7b74cb012ce01a1e5f0120 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Wed, 25 Mar 2026 10:39:19 +0100
Subject: [PATCH 1/4] fix: return metadata in recall responses (#674)

Metadata stored during retain was never retrieved during recall.
Add metadata to all SQL SELECT queries, the RetrievalResult dataclass,
ScoredResult.to_dict(), and MemoryFact construction in the recall pipeline.
---
 hindsight-api-slim/hindsight_api/engine/memory_engine.py  | 4 +++-
 .../hindsight_api/engine/search/graph_retrieval.py        | 2 +-
 .../hindsight_api/engine/search/mpfp_retrieval.py         | 2 +-
 .../hindsight_api/engine/search/retrieval.py              | 8 ++++----
 hindsight-api-slim/hindsight_api/engine/search/types.py   | 3 +++
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/engine/memory_engine.py b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
index 3300976af..8b31ff4b7 100644
--- a/hindsight-api-slim/hindsight_api/engine/memory_engine.py
+++ b/hindsight-api-slim/hindsight_api/engine/memory_engine.py
@@ -3234,7 +3234,7 @@ def to_tuple_format(results):
                             source_rows = await sf_conn.fetch(
                                 f"""
                                 SELECT id, text, fact_type, context, occurred_start, occurred_end,
-                                       mentioned_at, document_id, chunk_id, tags
+                                       mentioned_at, document_id, chunk_id, tags, metadata
                                 FROM {fq_table("memory_units")}
                                 WHERE id = ANY($1::uuid[])
                                 """,
@@ -3255,6 +3255,7 @@ def _make_source_fact(sid: str, r: Any) -> MemoryFact:
                                     occurred_end=r["occurred_end"].isoformat() if r["occurred_end"] else None,
                                     mentioned_at=r["mentioned_at"].isoformat() if r["mentioned_at"] else None,
                                     document_id=r["document_id"],
+                                    metadata=r["metadata"],
                                     chunk_id=str(r["chunk_id"]) if r["chunk_id"] else None,
                                     tags=r["tags"] or None,
                                 )
@@ -3333,6 +3334,7 @@ def _make_source_fact(sid: str, r: Any) -> MemoryFact:
                         occurred_end=result_dict.get("occurred_end"),
                         mentioned_at=result_dict.get("mentioned_at"),
                         document_id=result_dict.get("document_id"),
+                        metadata=result_dict.get("metadata"),
                         chunk_id=result_dict.get("chunk_id"),
                         tags=result_dict.get("tags"),
                         source_fact_ids=source_fact_ids_by_obs.get(result_id) if include_source_facts else None,
diff --git a/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py b/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py
index ce60963f4..7cdde9290 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/graph_retrieval.py
@@ -231,7 +231,7 @@ async def _retrieve_with_conn(
                     f"""
                     SELECT mu.id, mu.text, mu.context, mu.occurred_start, mu.occurred_end,
                            mu.mentioned_at, mu.fact_type,
-                           mu.document_id, mu.chunk_id, mu.tags,
+                           mu.document_id, mu.chunk_id, mu.tags, mu.metadata,
                            ml.weight, ml.link_type, ml.from_unit_id
                     FROM {fq_table("memory_links")} ml
                     JOIN {fq_table("memory_units")} mu ON ml.to_unit_id = mu.id
diff --git a/hindsight-api-slim/hindsight_api/engine/search/mpfp_retrieval.py b/hindsight-api-slim/hindsight_api/engine/search/mpfp_retrieval.py
index ad53914bc..e8d938cd2 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/mpfp_retrieval.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/mpfp_retrieval.py
@@ -449,7 +449,7 @@ async def fetch_memory_units_by_ids(
         rows = await conn.fetch(
             f"""
             SELECT id, text, context, event_date, occurred_start, occurred_end,
-                   mentioned_at, fact_type, document_id, chunk_id, tags
+                   mentioned_at, fact_type, document_id, chunk_id, tags, metadata
             FROM {fq_table("memory_units")}
             WHERE id = ANY($1::uuid[])
               AND fact_type = $2
diff --git a/hindsight-api-slim/hindsight_api/engine/search/retrieval.py b/hindsight-api-slim/hindsight_api/engine/search/retrieval.py
index 403855d1f..bca52afdf 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/retrieval.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/retrieval.py
@@ -148,7 +148,7 @@ async def retrieve_semantic_bm25_combined(
 
     cols = (
         "id, text, context, event_date, occurred_start, occurred_end, mentioned_at, "
-        "fact_type, document_id, chunk_id, tags"
+        "fact_type, document_id, chunk_id, tags, metadata"
     )
     table = fq_table("memory_units")
 
@@ -343,7 +343,7 @@ async def retrieve_temporal_combined(
               {groups_clause}
         ),
         sim_ranked AS (
-            SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
+            SELECT mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, mu.metadata,
                    1 - (mu.embedding <=> $1::vector) AS similarity,
                    ROW_NUMBER() OVER (PARTITION BY mu.fact_type ORDER BY mu.embedding <=> $1::vector) AS sim_rn
             FROM date_ranked dr
@@ -351,7 +351,7 @@ async def retrieve_temporal_combined(
             WHERE dr.rn <= 50
               AND (1 - (mu.embedding <=> $1::vector)) >= $6
         )
-        SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags, similarity
+        SELECT id, text, context, event_date, occurred_start, occurred_end, mentioned_at, fact_type, document_id, chunk_id, tags, metadata, similarity
         FROM sim_ranked
         WHERE sim_rn <= 10
         """,
@@ -449,7 +449,7 @@ async def retrieve_temporal_combined(
             # bank_id on memory_units lets the planner use idx_memory_units_bank_fact_type.
             neighbors = await conn.fetch(
                 f"""
-                SELECT src.from_unit_id, mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id, mu.chunk_id, mu.tags,
+                SELECT src.from_unit_id, mu.id, mu.text, mu.context, mu.event_date, mu.occurred_start, mu.occurred_end, mu.mentioned_at, mu.fact_type, mu.document_id, mu.chunk_id, mu.tags, mu.metadata,
                        l.weight, l.link_type,
                        1 - (mu.embedding <=> $1::vector) AS similarity
                 FROM unnest($2::uuid[]) AS src(from_unit_id)
diff --git a/hindsight-api-slim/hindsight_api/engine/search/types.py b/hindsight-api-slim/hindsight_api/engine/search/types.py
index 68913eabd..90edcf0ee 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/types.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/types.py
@@ -47,6 +47,7 @@ class RetrievalResult:
     document_id: str | None = None
     chunk_id: str | None = None
     tags: list[str] | None = None  # Visibility scope tags
+    metadata: dict[str, str] | None = None  # User-provided metadata
 
     # Retrieval-specific scores (only one will be set depending on retrieval method)
     similarity: float | None = None  # Semantic retrieval
@@ -70,6 +71,7 @@ def from_db_row(cls, row: dict[str, Any]) -> "RetrievalResult":
             document_id=row.get("document_id"),
             chunk_id=row.get("chunk_id"),
             tags=row.get("tags"),
+            metadata=row.get("metadata"),
             similarity=row.get("similarity"),
             bm25_score=row.get("bm25_score"),
             activation=row.get("activation"),
@@ -153,6 +155,7 @@ def to_dict(self) -> dict[str, Any]:
             "document_id": self.retrieval.document_id,
             "chunk_id": self.retrieval.chunk_id,
             "tags": self.retrieval.tags,
+            "metadata": self.retrieval.metadata,
             "semantic_similarity": self.retrieval.similarity,
             "bm25_score": self.retrieval.bm25_score,
         }

From 3f5644460e635bf1fe6fbb54e736f648e06a5212 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Wed, 25 Mar 2026 10:45:36 +0100
Subject: [PATCH 2/4] =?UTF-8?q?test:=20add=20metadata=20round-trip=20test?=
 =?UTF-8?q?=20for=20retain=E2=86=92recall?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace placeholder metadata test with one that actually passes
metadata via retain_batch_async and asserts it is returned on recall.
---
 hindsight-api-slim/tests/test_retain.py | 34 +++++++++++++++----------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/hindsight-api-slim/tests/test_retain.py b/hindsight-api-slim/tests/test_retain.py
index 1c4481cbb..de2e1dc82 100644
--- a/hindsight-api-slim/tests/test_retain.py
+++ b/hindsight-api-slim/tests/test_retain.py
@@ -814,34 +814,36 @@ async def test_context_with_batch(memory, request_context):
 @pytest.mark.asyncio
 async def test_metadata_storage_and_retrieval(memory, request_context):
     """
-    Test that user-defined metadata is preserved.
+    Test that user-defined metadata passed during retain is returned on recall.
     Metadata allows arbitrary key-value data to be stored with facts.
     """
     bank_id = f"test_metadata_{datetime.now(timezone.utc).timestamp()}"
 
     try:
-        # Store content with custom metadata
         custom_metadata = {
             "source": "slack",
             "channel": "engineering",
             "importance": "high",
-            "tags": "product,launch"
         }
 
-        # Note: retain_async doesn't directly support metadata parameter
-        # Metadata would need to be supported in the API layer
-        # For now, we test that the system handles content without errors
-        unit_ids = await memory.retain_async(
+        # Use retain_batch_async which supports the metadata parameter
+        unit_ids_list = await memory.retain_batch_async(
             bank_id=bank_id,
-            content="The product launch is scheduled for March 1st.",
-            context="planning meeting",
-            event_date=datetime(2024, 1, 15, tzinfo=timezone.utc),
+            contents=[
+                {
+                    "content": "The product launch is scheduled for March 1st.",
+                    "context": "planning meeting",
+                    "event_date": datetime(2024, 1, 15, tzinfo=timezone.utc),
+                    "metadata": custom_metadata,
+                }
+            ],
             request_context=request_context,
         )
 
-        assert len(unit_ids) > 0, "Should create memory units"
+        assert len(unit_ids_list) > 0, "Should create memory units"
+        assert len(unit_ids_list[0]) > 0, "Should have at least one unit ID"
 
-        # Recall to verify storage worked
+        # Recall and verify metadata is returned
         result = await memory.recall_async(
             bank_id=bank_id,
             query="When is the product launch?",
@@ -853,8 +855,12 @@ async def test_metadata_storage_and_retrieval(memory, request_context):
 
         assert len(result.results) > 0, "Should recall stored facts"
 
-        print("✓ Successfully stored and retrieved facts")
-        print("  (Note: Metadata support depends on API implementation)")
+        # Verify metadata is present on recalled facts
+        fact = result.results[0]
+        assert fact.metadata is not None, "Metadata should not be null on recall"
+        assert fact.metadata.get("source") == "slack"
+        assert fact.metadata.get("channel") == "engineering"
+        assert fact.metadata.get("importance") == "high"
 
     finally:
         await memory.delete_bank(bank_id, request_context=request_context)

From 39a94999fcba035f8d56399468a825dfcec530ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Wed, 25 Mar 2026 11:11:09 +0100
Subject: [PATCH 3/4] fix: parse metadata JSON string from database in
 MemoryFact

asyncpg may return JSONB columns as strings. Add a field_validator
to MemoryFact.metadata to handle JSON string deserialization.
---
 .../hindsight_api/engine/response_models.py       | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/hindsight-api-slim/hindsight_api/engine/response_models.py b/hindsight-api-slim/hindsight_api/engine/response_models.py
index f31d39c62..065bb4ffc 100644
--- a/hindsight-api-slim/hindsight_api/engine/response_models.py
+++ b/hindsight-api-slim/hindsight_api/engine/response_models.py
@@ -8,7 +8,7 @@
 
 from typing import Any
 
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict, Field, field_validator
 
 # Valid fact types for recall operations (excludes 'opinion' which is deprecated)
 VALID_RECALL_FACT_TYPES = frozenset(["world", "experience", "observation"])
@@ -159,6 +159,19 @@ class MemoryFact(BaseModel):
     mentioned_at: str | None = Field(None, description="ISO format date when the fact was mentioned/learned")
     document_id: str | None = Field(None, description="ID of the document this memory belongs to")
     metadata: dict[str, str] | None = Field(None, description="User-defined metadata")
+
+    @field_validator("metadata", mode="before")
+    @classmethod
+    def parse_metadata(cls, v: Any) -> dict[str, str] | None:
+        """Parse metadata from JSON string if needed (asyncpg may return JSONB as str)."""
+        if v is None:
+            return None
+        if isinstance(v, str):
+            import json
+
+            return json.loads(v)
+        return v
+
     chunk_id: str | None = Field(
         None, description="ID of the chunk this fact was extracted from (format: bank_id_document_id_chunk_index)"
     )

From aa53f0e58406a808ca0de27999b36339181c929d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nicol=C3=B2=20Boschi?= <boschi1997@gmail.com>
Date: Wed, 25 Mar 2026 17:57:22 +0100
Subject: [PATCH 4/4] fix: control plane UI fixes for recall and data view

- Sanitize NaN cross-encoder scores to 0.0 in reranking pipeline
  (Pydantic serializes NaN as JSON null, breaking UI score display)
- Add null-coalesce for score in search debug view to prevent crash
- Switch data view text filter from debounced onChange to Enter key
  (avoids slow ILIKE queries on every keystroke for large banks)
- Show loading spinner in search icon during filter requests
- Preserve search/tag filters when clicking "Load more"
---
 .../hindsight_api/engine/search/reranking.py  | 17 +++++--
 .../src/components/data-view.tsx              | 48 +++++++++----------
 .../src/components/search-debug-view.tsx      |  2 +-
 3 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/hindsight-api-slim/hindsight_api/engine/search/reranking.py b/hindsight-api-slim/hindsight_api/engine/search/reranking.py
index 0d948aa70..7d0c37cfe 100644
--- a/hindsight-api-slim/hindsight_api/engine/search/reranking.py
+++ b/hindsight-api-slim/hindsight_api/engine/search/reranking.py
@@ -153,6 +153,8 @@ async def rerank(self, query: str, candidates: list[MergedCandidate]) -> list[Sc
 
         # Normalize scores using sigmoid to [0, 1] range
         # Cross-encoder returns logits which can be negative
+        import math
+
         import numpy as np
 
         def sigmoid(x):
@@ -163,11 +165,20 @@ def sigmoid(x):
         # Create ScoredResult objects with cross-encoder scores
         scored_results = []
         for candidate, raw_score, norm_score in zip(candidates, scores, normalized_scores):
+            # Sanitize NaN scores (cross-encoder can return NaN for certain inputs).
+            # NaN propagates through all downstream scoring and Pydantic serializes
+            # NaN as JSON null, which breaks clients expecting numeric values.
+            raw = float(raw_score)
+            norm = float(norm_score)
+            if math.isnan(raw):
+                raw = 0.0
+            if math.isnan(norm):
+                norm = 0.0
             scored_result = ScoredResult(
                 candidate=candidate,
-                cross_encoder_score=float(raw_score),
-                cross_encoder_score_normalized=float(norm_score),
-                weight=float(norm_score),  # Initial weight is just cross-encoder score
+                cross_encoder_score=raw,
+                cross_encoder_score_normalized=norm,
+                weight=norm,  # Initial weight is just cross-encoder score
             )
             scored_results.append(scored_result)
 
diff --git a/hindsight-control-plane/src/components/data-view.tsx b/hindsight-control-plane/src/components/data-view.tsx
index 40b8d2c82..6d9922e30 100644
--- a/hindsight-control-plane/src/components/data-view.tsx
+++ b/hindsight-control-plane/src/components/data-view.tsx
@@ -227,29 +227,19 @@ export function DataView({ factType }: DataViewProps) {
   // Reset to first page when filters change
   useEffect(() => {
     setCurrentPage(1);
-  }, [searchQuery, tagFilters]);
-
-  // Debounce ref for text search
-  const searchDebounceRef = useRef<ReturnType<typeof setTimeout> | null>(null);
+  }, [tagFilters]);
 
-  // Trigger server-side reload when text filter changes (debounced 300ms)
-  useEffect(() => {
-    if (searchDebounceRef.current) {
-      clearTimeout(searchDebounceRef.current);
+  // Trigger text search on Enter key
+  const executeSearch = () => {
+    if (currentBank) {
+      setCurrentPage(1);
+      loadData(
+        undefined,
+        searchQuery || undefined,
+        tagFilters.length > 0 ? tagFilters : undefined
+      );
     }
-    searchDebounceRef.current = setTimeout(() => {
-      if (currentBank) {
-        loadData(
-          undefined,
-          searchQuery || undefined,
-          tagFilters.length > 0 ? tagFilters : undefined
-        );
-      }
-    }, 300);
-    return () => {
-      if (searchDebounceRef.current) clearTimeout(searchDebounceRef.current);
-    };
-  }, [searchQuery]);
+  };
 
   // Trigger server-side reload immediately when tag filters change
   useEffect(() => {
@@ -292,12 +282,22 @@ export function DataView({ factType }: DataViewProps) {
             <div className="flex items-center gap-2">
               {/* Text search */}
               <div className="relative max-w-xs flex-1">
-                <Search className="absolute left-2.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground pointer-events-none" />
+                {loading ? (
+                  <RefreshCw className="absolute left-2.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground pointer-events-none animate-spin" />
+                ) : (
+                  <Search className="absolute left-2.5 top-1/2 -translate-y-1/2 h-4 w-4 text-muted-foreground pointer-events-none" />
+                )}
                 <Input
                   type="text"
                   value={searchQuery}
                   onChange={(e) => setSearchQuery(e.target.value)}
-                  placeholder="Filter by text or context..."
+                  onKeyDown={(e) => {
+                    if (e.key === "Enter") {
+                      e.preventDefault();
+                      executeSearch();
+                    }
+                  }}
+                  placeholder="Filter by text or context (press Enter)..."
                   className="pl-8 h-9"
                 />
               </div>
@@ -356,7 +356,7 @@ export function DataView({ factType }: DataViewProps) {
                       onClick={() => {
                         const newLimit = Math.min(data.total_units, fetchLimit + 1000);
                         setFetchLimit(newLimit);
-                        loadData(newLimit);
+                        loadData(newLimit, searchQuery || undefined, tagFilters.length > 0 ? tagFilters : undefined);
                       }}
                       className="ml-2 text-primary hover:underline"
                     >
diff --git a/hindsight-control-plane/src/components/search-debug-view.tsx b/hindsight-control-plane/src/components/search-debug-view.tsx
index f278bc421..293e57fec 100644
--- a/hindsight-control-plane/src/components/search-debug-view.tsx
+++ b/hindsight-control-plane/src/components/search-debug-view.tsx
@@ -411,7 +411,7 @@ export function SearchDebugView() {
                               </div>
                             </div>
                             <div className="flex-shrink-0 text-right">
-                              <div className="text-sm font-semibold">{score.toFixed(3)}</div>
+                              <div className="text-sm font-semibold">{(score ?? 0).toFixed(3)}</div>
                               <div className="text-xs text-muted-foreground">score</div>
                             </div>
                             <ChevronRight className="h-5 w-5 text-muted-foreground flex-shrink-0" />