From 47eb5d18e3af5bab04bcb210b2df2768c5e3d71a Mon Sep 17 00:00:00 2001 From: anaslimem Date: Mon, 9 Mar 2026 03:08:25 +0100 Subject: [PATCH 1/6] refactored all the project from namespaces to collections --- CHANGELOG.md | 2 +- Cargo.lock | 4 +- crates/cortexadb-core/src/bin/manual_store.rs | 2 +- crates/cortexadb-core/src/bin/sync_bench.rs | 14 +- .../cortexadb-core/src/core/memory_entry.rs | 8 +- .../cortexadb-core/src/core/state_machine.rs | 34 ++-- crates/cortexadb-core/src/engine.rs | 14 +- crates/cortexadb-core/src/facade.rs | 80 ++++----- crates/cortexadb-core/src/index/graph.rs | 34 ++-- crates/cortexadb-core/src/index/vector.rs | 124 +++++++------- crates/cortexadb-core/src/query/executor.rs | 14 +- crates/cortexadb-core/src/query/hybrid.rs | 28 ++-- crates/cortexadb-core/src/storage/segment.rs | 2 +- .../src/storage/serialization.rs | 2 +- crates/cortexadb-core/src/store.rs | 18 +- crates/cortexadb-core/tests/integration.rs | 12 +- crates/cortexadb-py/.gitignore | 3 + crates/cortexadb-py/cortexadb/chunker.py | 3 +- crates/cortexadb-py/cortexadb/client.py | 20 +-- crates/cortexadb-py/cortexadb/loader.py | 3 +- .../cortexadb/providers/ollama.py | 2 +- crates/cortexadb-py/src/lib.rs | 11 +- crates/cortexadb-py/test_smoke.py | 71 ++++---- docs/api/python.md | 36 ++-- docs/api/rust.md | 22 +-- docs/content/docs/api/rust.mdx | 22 +-- docs/content/docs/guides/replay.mdx | 4 +- docs/getting-started/quickstart.md | 2 +- docs/guides/chunking.md | 4 +- docs/guides/collections.md | 157 ++++++++++++++++++ docs/guides/core-concepts.md | 6 +- docs/guides/namespaces.md | 157 ------------------ docs/guides/query-engine.md | 18 +- docs/guides/replay.md | 4 +- docs/index.md | 2 +- docs/resources/examples.md | 17 +- examples/rust/basic_usage.rs | 20 +-- 37 files changed, 484 insertions(+), 492 deletions(-) create mode 100644 docs/guides/collections.md delete mode 100644 docs/guides/namespaces.md diff --git a/CHANGELOG.md b/CHANGELOG.md index e500508..315d84e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,7 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Python bindings via PyO3 and maturain. - Vector semantic search and Graph relationship support. - Temporal query boosting. -- Multi-agent namespaces. +- Multi-agent collections. ### Fixed - Python 3.14 build compatibility in CI. diff --git a/Cargo.lock b/Cargo.lock index 4da4e43..45cfc74 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -197,7 +197,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "cortexadb-core" -version = "0.1.7" +version = "0.1.8" dependencies = [ "arc-swap", "bincode", @@ -218,7 +218,7 @@ dependencies = [ [[package]] name = "cortexadb-py" -version = "0.1.7" +version = "0.1.8" dependencies = [ "cortexadb-core", "pyo3", diff --git a/crates/cortexadb-core/src/bin/manual_store.rs b/crates/cortexadb-core/src/bin/manual_store.rs index 896b4ec..fc853f2 100644 --- a/crates/cortexadb-core/src/bin/manual_store.rs +++ b/crates/cortexadb-core/src/bin/manual_store.rs @@ -49,7 +49,7 @@ fn main() -> Result<(), Box> { store.add_edge(MemoryId(1), MemoryId(2), "relates_to".to_string())?; let mut options = QueryOptions::with_top_k(5); - options.namespace = Some("agent1".to_string()); + options.collection = Some("agent1".to_string()); let out = store.query("rust", options, &DemoEmbedder)?; diff --git a/crates/cortexadb-core/src/bin/sync_bench.rs b/crates/cortexadb-core/src/bin/sync_bench.rs index 774194a..ea22446 100644 --- a/crates/cortexadb-core/src/bin/sync_bench.rs +++ b/crates/cortexadb-core/src/bin/sync_bench.rs @@ -24,7 +24,7 @@ fn main() -> Result<(), Box> { for i in 0..cfg.ops { let entry = MemoryEntry::new( MemoryId(i), - cfg.namespace.clone(), + cfg.collection.clone(), format!("bench_mem_{}", i).into_bytes(), 1_700_000_000 + i, ) @@ -76,7 +76,7 @@ fn main() -> Result<(), Box> { struct BenchConfig { ops: u64, vector_dim: usize, - namespace: String, + collection: String, data_dir: PathBuf, policy: SyncPolicy, } @@ -85,7 +85,7 @@ impl BenchConfig { fn from_args(args: Vec) -> Result> { let mut ops: u64 = 20_000; let mut vector_dim: usize = 3; - let mut namespace = "bench".to_string(); + let mut collection = "bench".to_string(); let mut data_dir = std::env::temp_dir().join("cortexadb_sync_bench"); let mut mode = "strict".to_string(); @@ -104,9 +104,9 @@ impl BenchConfig { i += 1; vector_dim = args.get(i).ok_or("missing value for --vector-dim")?.parse()?; } - "--namespace" => { + "--collection" => { i += 1; - namespace = args.get(i).ok_or("missing value for --namespace")?.clone(); + collection = args.get(i).ok_or("missing value for --collection")?.clone(); } "--data-dir" => { i += 1; @@ -151,7 +151,7 @@ impl BenchConfig { _ => return Err(format!("invalid mode: {} (use strict|batch|async)", mode).into()), }; - Ok(Self { ops, vector_dim, namespace, data_dir, policy }) + Ok(Self { ops, vector_dim, collection, data_dir, policy }) } } @@ -169,7 +169,7 @@ fn print_help() { --mode strict|batch|async\n\ --ops (default: 20000)\n\ --vector-dim (default: 3)\n\ - --namespace (default: bench)\n\ + --collection (default: bench)\n\ --data-dir (default: /tmp/cortexadb_sync_bench)\n\ --batch-max-ops (default: 64)\n\ --batch-max-delay-ms (default: 25)\n\ diff --git a/crates/cortexadb-core/src/core/memory_entry.rs b/crates/cortexadb-core/src/core/memory_entry.rs index 1e4638f..8fe5c24 100644 --- a/crates/cortexadb-core/src/core/memory_entry.rs +++ b/crates/cortexadb-core/src/core/memory_entry.rs @@ -9,7 +9,7 @@ pub struct MemoryId(pub u64); #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct MemoryEntry { pub id: MemoryId, - pub namespace: String, + pub collection: String, pub content: Vec, pub embedding: Option>, pub metadata: HashMap, @@ -18,10 +18,10 @@ pub struct MemoryEntry { } impl MemoryEntry { - pub fn new(id: MemoryId, namespace: String, content: Vec, created_at: u64) -> Self { + pub fn new(id: MemoryId, collection: String, content: Vec, created_at: u64) -> Self { Self { id, - namespace, + collection, content, embedding: None, metadata: HashMap::new(), @@ -50,7 +50,7 @@ mod tests { let entry = MemoryEntry::new(MemoryId(1), "default".to_string(), b"test content".to_vec(), 1000); assert_eq!(entry.id, MemoryId(1)); - assert_eq!(entry.namespace, "default"); + assert_eq!(entry.collection, "default"); assert_eq!(entry.importance, 0.0); assert_eq!(entry.embedding, None); } diff --git a/crates/cortexadb-core/src/core/state_machine.rs b/crates/cortexadb-core/src/core/state_machine.rs index 49fec6c..1424929 100644 --- a/crates/cortexadb-core/src/core/state_machine.rs +++ b/crates/cortexadb-core/src/core/state_machine.rs @@ -11,8 +11,8 @@ pub enum StateMachineError { MemoryNotFound(MemoryId), #[error("Invalid state: {0}")] InvalidState(String), - #[error("Cross-namespace edge is not allowed: from={from:?} ({from_ns}) to={to:?} ({to_ns})")] - CrossNamespaceEdge { from: MemoryId, from_ns: String, to: MemoryId, to_ns: String }, + #[error("Cross-collection edge is not allowed: from={from:?} ({from_col}) to={to:?} ({to_col})")] + CrossCollectionEdge { from: MemoryId, from_col: String, to: MemoryId, to_col: String }, } pub type Result = std::result::Result; @@ -109,12 +109,12 @@ impl StateMachine { let from_entry = self.memories.get(&from).ok_or(StateMachineError::MemoryNotFound(from))?; let to_entry = self.memories.get(&to).ok_or(StateMachineError::MemoryNotFound(to))?; - if from_entry.namespace != to_entry.namespace { - return Err(StateMachineError::CrossNamespaceEdge { + if from_entry.collection != to_entry.collection { + return Err(StateMachineError::CrossCollectionEdge { from, - from_ns: from_entry.namespace.clone(), + from_col: from_entry.collection.clone(), to, - to_ns: to_entry.namespace.clone(), + to_col: to_entry.collection.clone(), }); } @@ -129,10 +129,10 @@ impl StateMachine { Ok(()) } - pub fn namespace_of(&self, id: MemoryId) -> Result<&str> { + pub fn collection_of(&self, id: MemoryId) -> Result<&str> { self.memories .get(&id) - .map(|e| e.namespace.as_str()) + .map(|e| e.collection.as_str()) .ok_or(StateMachineError::MemoryNotFound(id)) } @@ -149,10 +149,10 @@ impl StateMachine { self.memories.get(&id).ok_or(StateMachineError::MemoryNotFound(id)) } - /// Get all memories in a namespace - pub fn get_memories_in_namespace(&self, namespace: &str) -> Vec<&MemoryEntry> { + /// Get all memories in a collection + pub fn get_memories_in_collection(&self, collection: &str) -> Vec<&MemoryEntry> { let mut entries: Vec<_> = - self.memories.values().filter(|e| e.namespace == namespace).collect(); + self.memories.values().filter(|e| e.collection == collection).collect(); entries.sort_by_key(|e| e.id); entries } @@ -210,10 +210,10 @@ impl Default for StateMachine { mod tests { use super::*; - fn create_test_entry(id: u64, namespace: &str, timestamp: u64) -> MemoryEntry { + fn create_test_entry(id: u64, collection: &str, timestamp: u64) -> MemoryEntry { MemoryEntry::new( MemoryId(id), - namespace.to_string(), + collection.to_string(), format!("content_{}", id).into_bytes(), timestamp, ) @@ -309,13 +309,13 @@ mod tests { } #[test] - fn test_namespace_filtering() { + fn test_collection_filtering() { let mut sm = StateMachine::new(); sm.insert_memory(create_test_entry(1, "ns1", 1000)).unwrap(); sm.insert_memory(create_test_entry(2, "ns2", 1000)).unwrap(); sm.insert_memory(create_test_entry(3, "ns1", 1000)).unwrap(); - let ns1_entries = sm.get_memories_in_namespace("ns1"); + let ns1_entries = sm.get_memories_in_collection("ns1"); assert_eq!(ns1_entries.len(), 2); assert_eq!(ns1_entries[0].id, MemoryId(1)); assert_eq!(ns1_entries[1].id, MemoryId(3)); @@ -373,12 +373,12 @@ mod tests { } #[test] - fn test_cross_namespace_edge_rejected() { + fn test_cross_collection_edge_rejected() { let mut sm = StateMachine::new(); sm.insert_memory(create_test_entry(1, "ns1", 1000)).unwrap(); sm.insert_memory(create_test_entry(2, "ns2", 1000)).unwrap(); let result = sm.add_edge(MemoryId(1), MemoryId(2), "bad".to_string()); - assert!(matches!(result, Err(StateMachineError::CrossNamespaceEdge { .. }))); + assert!(matches!(result, Err(StateMachineError::CrossCollectionEdge { .. }))); } } diff --git a/crates/cortexadb-core/src/engine.rs b/crates/cortexadb-core/src/engine.rs index 1b98ab7..645b4d5 100644 --- a/crates/cortexadb-core/src/engine.rs +++ b/crates/cortexadb-core/src/engine.rs @@ -408,12 +408,12 @@ impl Engine { } fn estimate_memory_bytes(entry: &MemoryEntry) -> u64 { - let namespace_bytes = entry.namespace.len() as u64; + let collection_bytes = entry.collection.len() as u64; let content_bytes = entry.content.len() as u64; let embedding_bytes = entry.embedding.as_ref().map(|v| (v.len() as u64) * 4).unwrap_or(0); let metadata_bytes: u64 = entry.metadata.iter().map(|(k, v)| (k.len() + v.len()) as u64).sum(); - namespace_bytes + content_bytes + embedding_bytes + metadata_bytes + collection_bytes + content_bytes + embedding_bytes + metadata_bytes } /// Helper: Write entry to segments @@ -524,11 +524,11 @@ mod tests { // Lower importance + older entries should be evicted first. let entries = vec![ - MemoryEntry::new(MemoryId(1), "ns".to_string(), b"a".to_vec(), 1000) + MemoryEntry::new(MemoryId(1), "col".to_string(), b"a".to_vec(), 1000) .with_importance(0.1), - MemoryEntry::new(MemoryId(2), "ns".to_string(), b"b".to_vec(), 2000) + MemoryEntry::new(MemoryId(2), "col".to_string(), b"b".to_vec(), 2000) .with_importance(0.9), - MemoryEntry::new(MemoryId(3), "ns".to_string(), b"c".to_vec(), 3000) + MemoryEntry::new(MemoryId(3), "col".to_string(), b"c".to_vec(), 3000) .with_importance(0.2), ]; for entry in entries { @@ -744,7 +744,7 @@ mod tests { for i in 0..5 { let entry = MemoryEntry::new( MemoryId(i as u64), - "namespace".to_string(), + "collection".to_string(), format!("memory_{}", i).into_bytes(), 2000 + i as u64, ); @@ -764,7 +764,7 @@ mod tests { // Verify data is intact let memory = engine.get_state_machine().get_memory(MemoryId(0)).unwrap(); assert_eq!(memory.id, MemoryId(0)); - assert_eq!(memory.namespace, "namespace"); + assert_eq!(memory.collection, "collection"); } #[test] diff --git a/crates/cortexadb-core/src/facade.rs b/crates/cortexadb-core/src/facade.rs index eeddcf9..5700eec 100644 --- a/crates/cortexadb-core/src/facade.rs +++ b/crates/cortexadb-core/src/facade.rs @@ -31,7 +31,7 @@ pub struct Hit { pub struct Memory { pub id: u64, pub content: Vec, - pub namespace: String, + pub collection: String, pub embedding: Option>, pub metadata: HashMap, pub created_at: u64, @@ -187,7 +187,7 @@ pub struct CortexaDB { /// A record for batch insertion. #[derive(Debug, Clone)] pub struct BatchRecord { - pub namespace: String, + pub collection: String, pub content: Vec, pub embedding: Option>, pub metadata: Option>, @@ -254,7 +254,7 @@ impl CortexaDB { /// Store a new memory with the given embedding and optional metadata. /// - /// The memory is placed in the "default" namespace. + /// The memory is placed in the "default" collection. /// /// # Examples /// @@ -278,13 +278,13 @@ impl CortexaDB { embedding: Vec, metadata: Option>, ) -> Result { - self.remember_in_namespace("default", embedding, metadata) + self.remember_in_collection("default", embedding, metadata) } - /// Store a new memory in a specific namespace. - pub fn remember_in_namespace( + /// Store a new memory in a specific collection. + pub fn remember_in_collection( &self, - namespace: &str, + collection: &str, embedding: Vec, metadata: Option>, ) -> Result { @@ -292,7 +292,7 @@ impl CortexaDB { let ts = SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs(); let mut entry = - MemoryEntry::new(id, namespace.to_string(), Vec::new(), ts).with_embedding(embedding); + MemoryEntry::new(id, collection.to_string(), Vec::new(), ts).with_embedding(embedding); if let Some(meta) = metadata { entry.metadata = meta; } @@ -301,10 +301,10 @@ impl CortexaDB { Ok(id.0) } - /// Store a memory with explicit content bytes optionally in a namespace. + /// Store a memory with explicit content bytes optionally in a collection. pub fn remember_with_content( &self, - namespace: &str, + collection: &str, content: Vec, embedding: Vec, metadata: Option>, @@ -313,7 +313,7 @@ impl CortexaDB { let ts = SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs(); let mut entry = - MemoryEntry::new(id, namespace.to_string(), content, ts).with_embedding(embedding); + MemoryEntry::new(id, collection.to_string(), content, ts).with_embedding(embedding); if let Some(meta) = metadata { entry.metadata = meta; } @@ -330,7 +330,7 @@ impl CortexaDB { for rec in records { let id = MemoryId(self.next_id.fetch_add(1, std::sync::atomic::Ordering::Relaxed)); - let mut entry = MemoryEntry::new(id.clone(), rec.namespace, rec.content, ts); + let mut entry = MemoryEntry::new(id.clone(), rec.collection, rec.content, ts); if let Some(emb) = rec.embedding { entry = entry.with_embedding(emb); } @@ -379,35 +379,35 @@ impl CortexaDB { Ok(neighbors.into_iter().map(|(target_id, relation)| (target_id.0, relation)).collect()) } - /// Query the database scoped to a specific namespace. + /// Query the database scoped to a specific collection. /// - /// Over-fetches by 4× top_k globally, then filters by namespace and + /// Over-fetches by 4× top_k globally, then filters by collection and /// returns the top *top_k* results. This avoids a separate index per - /// namespace while keeping the filter inside Rust (no GIL round-trips). - pub fn ask_in_namespace( + /// collection while keeping the filter inside Rust (no GIL round-trips). + pub fn ask_in_collection( &self, - namespace: &str, + collection: &str, query_embedding: Vec, top_k: usize, metadata_filter: Option>, ) -> Result> { let embedder = StaticEmbedder { embedding: query_embedding }; let mut options = QueryOptions::with_top_k(top_k.saturating_mul(4).max(top_k)); - options.namespace = Some(namespace.to_string()); + options.collection = Some(collection.to_string()); options.metadata_filter = metadata_filter; let execution = self.inner.query("", options, &embedder)?; let sm = self.inner.state_machine(); let memories = sm.all_memories(); - // Build a lookup: MemoryId → namespace - let ns_map: std::collections::HashMap = - memories.iter().map(|m| (m.id.0, m.namespace.as_str())).collect(); + // Build a lookup: MemoryId → collection + let col_map: std::collections::HashMap = + memories.iter().map(|m| (m.id.0, m.collection.as_str())).collect(); let results: Vec = execution .hits .into_iter() - .filter(|hit| ns_map.get(&hit.id.0).copied() == Some(namespace)) + .filter(|hit| col_map.get(&hit.id.0).copied() == Some(collection)) .take(top_k) .map(|hit| Hit { id: hit.id.0, score: hit.final_score }) .collect(); @@ -426,7 +426,7 @@ impl CortexaDB { Ok(Memory { id: entry.id.0, content: entry.content.clone(), - namespace: entry.namespace.clone(), + collection: entry.collection.clone(), embedding: entry.embedding.clone(), metadata: entry.metadata.clone(), created_at: entry.created_at, @@ -623,19 +623,19 @@ mod tests { } #[test] - fn test_namespace_support() { + fn test_collection_support() { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - let id1 = db.remember_in_namespace("agent_b", vec![0.0, 1.0, 0.0], None).unwrap(); - let _id2 = db.remember_in_namespace("agent_c", vec![0.0, 0.0, 1.0], None).unwrap(); + let id1 = db.remember_in_collection("agent_b", vec![0.0, 1.0, 0.0], None).unwrap(); + let _id2 = db.remember_in_collection("agent_c", vec![0.0, 0.0, 1.0], None).unwrap(); let stats = db.stats(); assert_eq!(stats.entries, 2); let m1 = db.get_memory(id1).unwrap(); - assert_eq!(m1.namespace, "agent_b"); + assert_eq!(m1.collection, "agent_b"); } #[test] @@ -721,16 +721,16 @@ mod tests { } #[test] - fn test_ask_in_namespace_only_returns_own_namespace() { + fn test_ask_in_collection_only_returns_own_collection() { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - // Same embedding direction — only namespace should differentiate results. - let id_a = db.remember_in_namespace("ns_a", vec![1.0, 0.0, 0.0], None).unwrap(); - let _id_b = db.remember_in_namespace("ns_b", vec![1.0, 0.0, 0.0], None).unwrap(); + // Same embedding direction — only collection should differentiate results. + let id_a = db.remember_in_collection("ns_a", vec![1.0, 0.0, 0.0], None).unwrap(); + let _id_b = db.remember_in_collection("ns_b", vec![1.0, 0.0, 0.0], None).unwrap(); - let hits = db.ask_in_namespace("ns_a", vec![1.0, 0.0, 0.0], 10, None).unwrap(); + let hits = db.ask_in_collection("ns_a", vec![1.0, 0.0, 0.0], 10, None).unwrap(); assert!(!hits.is_empty(), "should find memories in ns_a"); assert!( hits.iter().all(|h| h.id == id_a), @@ -757,12 +757,12 @@ mod tests { db.compact().expect("compact must not fail"); } - // ----- ask_in_namespace: sparse namespace over-fetch regression ----- + // ----- ask_in_collection: sparse collection over-fetch regression ----- #[test] - fn test_ask_in_namespace_finds_entry_in_sparse_namespace() { - // Regression: before the 4× fix, ask_in_namespace returned empty results when the - // target namespace had far fewer entries than top_k * candidate_multiplier entries globally. + fn test_ask_in_collection_finds_entry_in_sparse_collection() { + // Regression: before the 4× fix, ask_in_collection returned empty results when the + // target collection had far fewer entries than top_k * candidate_multiplier entries globally. let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); @@ -770,14 +770,14 @@ mod tests { // Insert 10 entries in ns_majority to fill the global index. for i in 0..10u32 { let v = vec![i as f32 / 10.0, 1.0 - i as f32 / 10.0, 0.0]; - db.remember_in_namespace("ns_majority", v, None).unwrap(); + db.remember_in_collection("ns_majority", v, None).unwrap(); } // Insert 2 entries in ns_sparse. - let id_a = db.remember_in_namespace("ns_sparse", vec![1.0, 0.0, 0.0], None).unwrap(); - let id_b = db.remember_in_namespace("ns_sparse", vec![0.9, 0.1, 0.0], None).unwrap(); + let id_a = db.remember_in_collection("ns_sparse", vec![1.0, 0.0, 0.0], None).unwrap(); + let id_b = db.remember_in_collection("ns_sparse", vec![0.9, 0.1, 0.0], None).unwrap(); // Ask for top-2 in ns_sparse — both must be returned. - let hits = db.ask_in_namespace("ns_sparse", vec![1.0, 0.0, 0.0], 2, None).unwrap(); + let hits = db.ask_in_collection("ns_sparse", vec![1.0, 0.0, 0.0], 2, None).unwrap(); let hit_ids: Vec = hits.iter().map(|h| h.id).collect(); assert!( hit_ids.contains(&id_a), diff --git a/crates/cortexadb-core/src/index/graph.rs b/crates/cortexadb-core/src/index/graph.rs index eaf2f0a..599910c 100644 --- a/crates/cortexadb-core/src/index/graph.rs +++ b/crates/cortexadb-core/src/index/graph.rs @@ -34,20 +34,20 @@ impl GraphIndex { Self::bfs_internal(state_machine, start, max_hops, None) } - pub fn bfs_in_namespace( + pub fn bfs_in_collection( state_machine: &StateMachine, start: MemoryId, max_hops: usize, - namespace: &str, + collection: &str, ) -> Result> { - Self::bfs_internal(state_machine, start, max_hops, Some(namespace)) + Self::bfs_internal(state_machine, start, max_hops, Some(collection)) } fn bfs_internal( state_machine: &StateMachine, start: MemoryId, max_hops: usize, - namespace: Option<&str>, + collection: Option<&str>, ) -> Result> { if max_hops == 0 { return Err(GraphError::InvalidMaxHops(max_hops)); @@ -56,8 +56,8 @@ impl GraphIndex { // Verify start memory exists let start_entry = state_machine.get_memory(start).map_err(|_| GraphError::MemoryNotFound(start))?; - if let Some(ns) = namespace { - if start_entry.namespace != ns { + if let Some(col) = collection { + if start_entry.collection != col { return Ok(HashMap::new()); } } @@ -77,12 +77,12 @@ impl GraphIndex { // Get neighbors of current memory if let Ok(neighbors) = state_machine.get_neighbors(current) { for (neighbor_id, _relation) in neighbors { - if let Some(ns) = namespace { - let in_namespace = state_machine - .namespace_of(neighbor_id) - .map(|v| v == ns) + if let Some(col) = collection { + let in_collection = state_machine + .collection_of(neighbor_id) + .map(|v| v == col) .unwrap_or(false); - if !in_namespace { + if !in_collection { continue; } } @@ -251,13 +251,13 @@ impl GraphIndex { Ok(ids) } - pub fn get_reachable_in_namespace( + pub fn get_reachable_in_collection( state_machine: &StateMachine, start: MemoryId, max_hops: usize, - namespace: &str, + collection: &str, ) -> Result> { - let visited = Self::bfs_in_namespace(state_machine, start, max_hops, namespace)?; + let visited = Self::bfs_in_collection(state_machine, start, max_hops, collection)?; let mut ids: Vec = visited.keys().copied().collect(); ids.sort(); Ok(ids) @@ -446,7 +446,7 @@ mod tests { } #[test] - fn test_bfs_in_namespace_filters_neighbors() { + fn test_bfs_in_collection_filters_neighbors() { let mut sm = StateMachine::new(); sm.insert_memory(MemoryEntry::new(MemoryId(1), "agent1".to_string(), b"a".to_vec(), 1000)) .unwrap(); @@ -456,10 +456,10 @@ mod tests { .unwrap(); sm.add_edge(MemoryId(1), MemoryId(2), "ok".to_string()).unwrap(); - // Cross namespace should be rejected by state machine, so no leakage via edges. + // Cross collection should be rejected by state machine, so no leakage via edges. assert!(sm.add_edge(MemoryId(2), MemoryId(3), "bad".to_string()).is_err()); - let scoped = GraphIndex::bfs_in_namespace(&sm, MemoryId(1), 3, "agent1").unwrap(); + let scoped = GraphIndex::bfs_in_collection(&sm, MemoryId(1), 3, "agent1").unwrap(); assert!(scoped.contains_key(&MemoryId(1))); assert!(scoped.contains_key(&MemoryId(2))); assert!(!scoped.contains_key(&MemoryId(3))); diff --git a/crates/cortexadb-core/src/index/vector.rs b/crates/cortexadb-core/src/index/vector.rs index dc32524..0338abc 100644 --- a/crates/cortexadb-core/src/index/vector.rs +++ b/crates/cortexadb-core/src/index/vector.rs @@ -22,7 +22,7 @@ pub enum VectorError { pub type Result = std::result::Result; -const DEFAULT_NAMESPACE: &str = "__global__"; +const DEFAULT_COLLECTION: &str = "__global__"; #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum VectorBackendMode { @@ -47,8 +47,8 @@ trait AnnCandidateProvider: Send + Sync + std::fmt::Debug { &self, query: &[f32], ann_k: usize, - namespace: Option<&str>, - partitions: &HashMap, + collection: Option<&str>, + partitions: &HashMap, ) -> Result>; } @@ -64,16 +64,16 @@ impl AnnCandidateProvider for PrefixAnnCandidateProvider { &self, query: &[f32], ann_k: usize, - namespace: Option<&str>, - partitions: &HashMap, + collection: Option<&str>, + partitions: &HashMap, ) -> Result> { let approx_dims = query.len().clamp(1, 8); let query_prefix = &query[..approx_dims]; let query_mag = magnitude(query_prefix)?; let mut approx_scored = Vec::new(); - let iter: Box> = match namespace { - Some(ns) => match partitions.get_key_value(ns) { + let iter: Box> = match collection { + Some(col) => match partitions.get_key_value(col) { Some(one) => Box::new(std::iter::once(one)), None => Box::new(std::iter::empty()), }, @@ -91,7 +91,7 @@ impl AnnCandidateProvider for PrefixAnnCandidateProvider { } if approx_scored.is_empty() { - if namespace.is_some() { + if collection.is_some() { return Ok(Vec::new()); } return Err(VectorError::NoEmbeddings); @@ -118,10 +118,10 @@ impl AnnCandidateProvider for HnswReadyAnnCandidateProvider { &self, query: &[f32], ann_k: usize, - namespace: Option<&str>, - partitions: &HashMap, + collection: Option<&str>, + partitions: &HashMap, ) -> Result> { - PrefixAnnCandidateProvider.candidates(query, ann_k, namespace, partitions) + PrefixAnnCandidateProvider.candidates(query, ann_k, collection, partitions) } } @@ -152,7 +152,7 @@ impl VectorSearchBackend for AnnBackend { } #[derive(Debug, Clone, Default)] -struct NamespacePartition { +struct CollectionPartition { embeddings: HashMap>, tombstones: HashSet, } @@ -164,10 +164,10 @@ struct NamespacePartition { /// Supports both exact (brute-force) and HNSW approximate search. #[derive(Clone)] pub struct VectorIndex { - /// namespace -> partition - partitions: HashMap, - /// Global lookup for ID -> namespace - id_to_namespace: HashMap, + /// collection -> partition + partitions: HashMap, + /// Global lookup for ID -> collection + id_to_collection: HashMap, /// Dimension of embeddings (typically 384, 768, 1536) vector_dimension: usize, /// Search backend mode @@ -196,7 +196,7 @@ impl VectorIndex { pub fn new(vector_dimension: usize) -> Self { Self { partitions: HashMap::new(), - id_to_namespace: HashMap::new(), + id_to_collection: HashMap::new(), vector_dimension, backend_mode: VectorBackendMode::Exact, backend: Arc::new(ExactBackend), @@ -226,7 +226,7 @@ impl VectorIndex { }; Ok(Self { partitions: HashMap::new(), - id_to_namespace: HashMap::new(), + id_to_collection: HashMap::new(), vector_dimension, backend_mode: VectorBackendMode::Exact, backend: Arc::new(ExactBackend), @@ -293,12 +293,12 @@ impl VectorIndex { /// Add or update embedding for a memory pub fn index(&mut self, id: MemoryId, embedding: Vec) -> Result<()> { - self.index_in_namespace(DEFAULT_NAMESPACE, id, embedding) + self.index_in_collection(DEFAULT_COLLECTION, id, embedding) } - pub fn index_in_namespace>( + pub fn index_in_collection>( &mut self, - namespace: S, + collection: S, id: MemoryId, embedding: Vec, ) -> Result<()> { @@ -309,20 +309,20 @@ impl VectorIndex { }); } - let namespace = namespace.as_ref().to_string(); - if let Some(previous_ns) = self.id_to_namespace.get(&id).cloned() { - if previous_ns != namespace { - if let Some(partition) = self.partitions.get_mut(&previous_ns) { + let collection = collection.as_ref().to_string(); + if let Some(previous_col) = self.id_to_collection.get(&id).cloned() { + if previous_col != collection { + if let Some(partition) = self.partitions.get_mut(&previous_col) { partition.embeddings.remove(&id); partition.tombstones.remove(&id); } } } - let partition = self.partitions.entry(namespace.clone()).or_default(); + let partition = self.partitions.entry(collection.clone()).or_default(); partition.tombstones.remove(&id); partition.embeddings.insert(id, embedding.clone()); - self.id_to_namespace.insert(id, namespace); + self.id_to_collection.insert(id, collection); // Also add to HNSW backend if enabled if let Some(ref hnsw) = self.hnsw_backend { @@ -334,9 +334,9 @@ impl VectorIndex { /// Remove embedding for a memory pub fn remove(&mut self, id: MemoryId) -> Result<()> { - if let Some(namespace) = self.id_to_namespace.get(&id).cloned() { + if let Some(collection) = self.id_to_collection.get(&id).cloned() { let mode = self.backend_mode; - if let Some(partition) = self.partitions.get_mut(&namespace) { + if let Some(partition) = self.partitions.get_mut(&collection) { match mode { VectorBackendMode::Exact => { partition.embeddings.remove(&id); @@ -355,10 +355,10 @@ impl VectorIndex { } } if partition.embeddings.is_empty() { - self.partitions.remove(&namespace); + self.partitions.remove(&collection); } } - self.id_to_namespace.remove(&id); + self.id_to_collection.remove(&id); // Also remove from HNSW backend if enabled if let Some(ref hnsw) = self.hnsw_backend { @@ -370,10 +370,10 @@ impl VectorIndex { /// Check if memory has embedding pub fn has(&self, id: MemoryId) -> bool { - let Some(namespace) = self.id_to_namespace.get(&id) else { + let Some(collection) = self.id_to_collection.get(&id) else { return false; }; - let Some(partition) = self.partitions.get(namespace) else { + let Some(partition) = self.partitions.get(collection) else { return false; }; partition.embeddings.contains_key(&id) && !partition.tombstones.contains(&id) @@ -410,7 +410,7 @@ impl VectorIndex { &self, query: &[f32], top_k: usize, - namespace: Option<&str>, + collection: Option<&str>, use_parallel: bool, ann_candidate_multiplier: usize, ) -> Result> { @@ -447,13 +447,13 @@ impl VectorIndex { // Default: exact search match self.backend.mode() { VectorBackendMode::Exact => { - self.search_exact_scoped(query, top_k, namespace, use_parallel) + self.search_exact_scoped(query, top_k, collection, use_parallel) } VectorBackendMode::Ann { .. } => { let ann_multiplier = ann_candidate_multiplier.max(self.backend.ann_multiplier_hint()).max(1); let ann_k = top_k.saturating_mul(ann_multiplier); - let approx = self.search_approx_candidates(query, ann_k, namespace)?; + let approx = self.search_approx_candidates(query, ann_k, collection)?; if approx.is_empty() { return Ok(Vec::new()); } @@ -497,8 +497,8 @@ impl VectorIndex { let mut results: Vec<(MemoryId, f32)> = candidate_ids .iter() .filter_map(|id| { - let namespace = self.id_to_namespace.get(id)?; - let partition = self.partitions.get(namespace)?; + let collection = self.id_to_collection.get(id)?; + let partition = self.partitions.get(collection)?; if partition.tombstones.contains(id) { return None; } @@ -520,10 +520,10 @@ impl VectorIndex { /// Get all indexed memory IDs pub fn indexed_ids(&self) -> Vec { - self.id_to_namespace.keys().copied().filter(|id| self.has(*id)).collect() + self.id_to_collection.keys().copied().filter(|id| self.has(*id)).collect() } - fn compact_partition(partition: &mut NamespacePartition) { + fn compact_partition(partition: &mut CollectionPartition) { if partition.tombstones.is_empty() { return; } @@ -536,19 +536,19 @@ impl VectorIndex { /// Compact the vector index by permanently removing tombstones from exact partitions /// and completely rebuilding the approximate nearest neighbor (HNSW) index to free memory. pub fn compact(&mut self) -> Result { - let mut empty_namespaces = Vec::new(); + let mut empty_collections = Vec::new(); // 1. Compact exact partitions - for (ns, partition) in &mut self.partitions { + for (col, partition) in &mut self.partitions { Self::compact_partition(partition); if partition.embeddings.is_empty() { - empty_namespaces.push(ns.clone()); + empty_collections.push(col.clone()); } } // 2. Remove empty partitions - for ns in empty_namespaces { - self.partitions.remove(&ns); + for col in empty_collections { + self.partitions.remove(&col); } // 3. Rebuild HNSW backend if enabled @@ -575,11 +575,11 @@ impl VectorIndex { fn partition_iter<'a>( &'a self, - namespace: Option<&str>, - ) -> Box + 'a> { - match namespace { - Some(ns) => { - if let Some(partition) = self.partitions.get_key_value(ns) { + collection: Option<&str>, + ) -> Box + 'a> { + match collection { + Some(col) => { + if let Some(partition) = self.partitions.get_key_value(col) { Box::new(std::iter::once(partition)) } else { Box::new(std::iter::empty()) @@ -593,13 +593,13 @@ impl VectorIndex { &self, query: &[f32], top_k: usize, - namespace: Option<&str>, + collection: Option<&str>, use_parallel: bool, ) -> Result> { let query_magnitude = magnitude(query)?; let mut results: Vec<(MemoryId, f32)> = Vec::new(); - for (_ns, partition) in self.partition_iter(namespace) { + for (_col, partition) in self.partition_iter(collection) { let iter_results: Vec<(MemoryId, f32)> = if use_parallel { partition .embeddings @@ -627,7 +627,7 @@ impl VectorIndex { } if results.is_empty() { - if namespace.is_some() { + if collection.is_some() { return Ok(Vec::new()); } return Err(VectorError::NoEmbeddings); @@ -643,9 +643,9 @@ impl VectorIndex { &self, query: &[f32], ann_k: usize, - namespace: Option<&str>, + collection: Option<&str>, ) -> Result> { - self.ann_provider.candidates(query, ann_k, namespace, &self.partitions) + self.ann_provider.candidates(query, ann_k, collection, &self.partitions) } fn rerank_exact( @@ -657,10 +657,10 @@ impl VectorIndex { let query_mag = magnitude(query)?; let mut out = Vec::new(); for id in candidate_ids { - let Some(ns) = self.id_to_namespace.get(id) else { + let Some(col) = self.id_to_collection.get(id) else { continue; }; - let Some(partition) = self.partitions.get(ns) else { + let Some(partition) = self.partitions.get(col) else { continue; }; if partition.tombstones.contains(id) { @@ -969,10 +969,10 @@ mod tests { } #[test] - fn test_namespace_partition_search_scope() { + fn test_collection_partition_search_scope() { let mut index = VectorIndex::new(3); - index.index_in_namespace("agent1", MemoryId(1), vec![1.0, 0.0, 0.0]).unwrap(); - index.index_in_namespace("agent2", MemoryId(2), vec![1.0, 0.0, 0.0]).unwrap(); + index.index_in_collection("agent1", MemoryId(1), vec![1.0, 0.0, 0.0]).unwrap(); + index.index_in_collection("agent2", MemoryId(2), vec![1.0, 0.0, 0.0]).unwrap(); let scoped = index.search_scoped(&[1.0, 0.0, 0.0], 10, Some("agent1"), false, 1).unwrap(); assert_eq!(scoped.len(), 1); @@ -985,7 +985,7 @@ mod tests { index.set_backend_mode(VectorBackendMode::Ann { ann_search_multiplier: 7 }); for i in 0..30u64 { let emb = if i == 29 { vec![1.0, 0.0, 0.0] } else { vec![0.6, 0.8, 0.0] }; - index.index_in_namespace("agent1", MemoryId(i), emb).unwrap(); + index.index_in_collection("agent1", MemoryId(i), emb).unwrap(); } let results = index.search_scoped(&[1.0, 0.0, 0.0], 3, Some("agent1"), false, 7).unwrap(); @@ -999,7 +999,7 @@ mod tests { index.set_backend_mode(VectorBackendMode::Ann { ann_search_multiplier: 7 }); for i in 0..10u64 { - index.index_in_namespace("agent1", MemoryId(i), vec![1.0, 0.0, 0.0]).unwrap(); + index.index_in_collection("agent1", MemoryId(i), vec![1.0, 0.0, 0.0]).unwrap(); } assert_eq!(index.len(), 10); diff --git a/crates/cortexadb-core/src/query/executor.rs b/crates/cortexadb-core/src/query/executor.rs index a60d387..4098279 100644 --- a/crates/cortexadb-core/src/query/executor.rs +++ b/crates/cortexadb-core/src/query/executor.rs @@ -169,7 +169,7 @@ impl QueryExecutor { let vector_results = index_layer.vector.search_scoped( &query_embedding, candidate_k, - options.namespace.as_deref(), + options.collection.as_deref(), plan.use_parallel, plan.ann_candidate_multiplier, )?; @@ -198,8 +198,8 @@ impl QueryExecutor { if expansion.hops > 0 { let mut expanded_ids = HashSet::new(); for id in candidate_scores.keys().copied() { - let reachable = if let Some(ns) = options.namespace.as_deref() { - GraphIndex::bfs_in_namespace(state_machine, id, expansion.hops, ns)? + let reachable = if let Some(col) = options.collection.as_deref() { + GraphIndex::bfs_in_collection(state_machine, id, expansion.hops, col)? } else { GraphIndex::bfs(state_machine, id, expansion.hops)? }; @@ -207,7 +207,7 @@ impl QueryExecutor { if matches_filters( state_machine, neighbor, - options.namespace.as_deref(), + options.collection.as_deref(), None, options.metadata_filter.as_ref(), ) { @@ -261,7 +261,7 @@ impl QueryExecutor { fn matches_filters( state_machine: &StateMachine, id: MemoryId, - namespace: Option<&str>, + collection: Option<&str>, time_range: Option<(u64, u64)>, metadata_filter: Option<&HashMap>, ) -> bool { @@ -270,8 +270,8 @@ fn matches_filters( Err(_) => return false, }; - if let Some(ns) = namespace { - if entry.namespace != ns { + if let Some(col) = collection { + if entry.collection != col { return false; } } diff --git a/crates/cortexadb-core/src/query/hybrid.rs b/crates/cortexadb-core/src/query/hybrid.rs index 0fc92ce..f1c491a 100644 --- a/crates/cortexadb-core/src/query/hybrid.rs +++ b/crates/cortexadb-core/src/query/hybrid.rs @@ -105,7 +105,7 @@ impl GraphExpansionOptions { #[derive(Debug, Clone)] pub struct QueryOptions { pub top_k: usize, - pub namespace: Option, + pub collection: Option, pub time_range: Option<(u64, u64)>, pub graph_expansion: Option, pub candidate_multiplier: usize, @@ -124,7 +124,7 @@ impl Default for QueryOptions { fn default() -> Self { Self { top_k: 10, - namespace: None, + collection: None, time_range: None, // Hybrid-first default: expand one hop when graph signal exists. graph_expansion: Some(GraphExpansionOptions::new(1)), @@ -165,10 +165,10 @@ impl<'a> HybridQueryEngine<'a> { &self, query_text: &str, top_k: usize, - namespace: Option<&str>, + collection: Option<&str>, ) -> Result> { let mut options = QueryOptions::with_top_k(top_k); - options.namespace = namespace.map(|ns| ns.to_string()); + options.collection = collection.map(|ns| ns.to_string()); self.query_with_options(query_text, options) } @@ -198,7 +198,7 @@ impl<'a> HybridQueryEngine<'a> { let vector_results = self.index_layer.vector.search_scoped( &query_embedding, candidate_k, - options.namespace.as_deref(), + options.collection.as_deref(), false, ann_multiplier, )?; @@ -216,15 +216,15 @@ impl<'a> HybridQueryEngine<'a> { let mut expanded_ids = HashSet::new(); let base_ids: Vec = candidate_scores.keys().copied().collect(); for id in base_ids { - let reachable = if let Some(ns) = options.namespace.as_deref() { - GraphIndex::bfs_in_namespace(self.state_machine, id, expansion.hops, ns)? + let reachable = if let Some(col) = options.collection.as_deref() { + GraphIndex::bfs_in_collection(self.state_machine, id, expansion.hops, col)? } else { GraphIndex::bfs(self.state_machine, id, expansion.hops)? }; for reachable_id in reachable.keys().copied() { if self.matches_filters( reachable_id, - options.namespace.as_deref(), + options.collection.as_deref(), None, options.metadata_filter.as_ref(), ) { @@ -305,7 +305,7 @@ impl<'a> HybridQueryEngine<'a> { fn matches_filters( &self, id: MemoryId, - namespace: Option<&str>, + collection: Option<&str>, time_range: Option<(u64, u64)>, metadata_filter: Option<&HashMap>, ) -> bool { @@ -314,8 +314,8 @@ impl<'a> HybridQueryEngine<'a> { Err(_) => return false, }; - if let Some(ns) = namespace { - if entry.namespace != ns { + if let Some(col) = collection { + if entry.collection != col { return false; } } @@ -370,7 +370,7 @@ mod tests { for entry in [&a, &b, &c] { layer .vector_index_mut() - .index_in_namespace(&entry.namespace, entry.id, entry.embedding.clone().unwrap()) + .index_in_collection(&entry.collection, entry.id, entry.embedding.clone().unwrap()) .unwrap(); } sm.insert_memory(a).unwrap(); @@ -383,7 +383,7 @@ mod tests { } #[test] - fn test_query_with_namespace_filter() { + fn test_query_with_collection_filter() { let (sm, layer, embedder) = build_engine(); let engine = HybridQueryEngine::new(&sm, &layer, &embedder); @@ -409,7 +409,7 @@ mod tests { let engine = HybridQueryEngine::new(&sm, &layer, &embedder); let mut options = QueryOptions::with_top_k(10); - options.namespace = Some("agent1".to_string()); + options.collection = Some("agent1".to_string()); options.time_range = Some((1000, 1000)); // only id=1 from vector base filter options.graph_expansion = Some(GraphExpansionOptions::new(1)); // expands to id=2 diff --git a/crates/cortexadb-core/src/storage/segment.rs b/crates/cortexadb-core/src/storage/segment.rs index 412f3c6..34f5d07 100644 --- a/crates/cortexadb-core/src/storage/segment.rs +++ b/crates/cortexadb-core/src/storage/segment.rs @@ -467,7 +467,7 @@ mod tests { let read_entry = storage.read_entry(MemoryId(1)).unwrap(); assert_eq!(read_entry.id, MemoryId(1)); - assert_eq!(read_entry.namespace, "test"); + assert_eq!(read_entry.collection, "test"); } #[test] diff --git a/crates/cortexadb-core/src/storage/serialization.rs b/crates/cortexadb-core/src/storage/serialization.rs index 1b62745..829032a 100644 --- a/crates/cortexadb-core/src/storage/serialization.rs +++ b/crates/cortexadb-core/src/storage/serialization.rs @@ -55,6 +55,6 @@ mod tests { let deserialized: MemoryEntry = deserialize_versioned(&legacy_serialized).unwrap(); assert_eq!(deserialized.id, MemoryId(123)); - assert_eq!(deserialized.namespace, "old"); + assert_eq!(deserialized.collection, "old"); } } diff --git a/crates/cortexadb-core/src/store.rs b/crates/cortexadb-core/src/store.rs index d6e5a43..ad7d4a6 100644 --- a/crates/cortexadb-core/src/store.rs +++ b/crates/cortexadb-core/src/store.rs @@ -595,8 +595,8 @@ impl CortexaDBStore { // Update vector index match effective.embedding { Some(embedding) => { - writer.indexes.vector_index_mut().index_in_namespace( - &effective.namespace, + writer.indexes.vector_index_mut().index_in_collection( + &effective.collection, effective.id, embedding, )?; @@ -872,8 +872,8 @@ impl CortexaDBStore { writer.engine.execute_command_unsynced(Command::InsertMemory(entry.clone()))? }; match entry.embedding { - Some(embedding) => writer.indexes.vector_index_mut().index_in_namespace( - &entry.namespace, + Some(embedding) => writer.indexes.vector_index_mut().index_in_collection( + &entry.collection, entry.id, embedding, )?, @@ -965,8 +965,8 @@ impl CortexaDBStore { for entry in state_machine.all_memories() { if let Some(embedding) = entry.embedding.clone() { if !existing_ids.contains(&entry.id) { - indexes.vector_index_mut().index_in_namespace( - &entry.namespace, + indexes.vector_index_mut().index_in_collection( + &entry.collection, entry.id, embedding, )?; @@ -1102,7 +1102,7 @@ mod tests { store.insert_memory(b).unwrap(); let mut options = QueryOptions::with_top_k(2); - options.namespace = Some("agent1".to_string()); + options.collection = Some("agent1".to_string()); let out = store.query("hello", options, &TestEmbedder).unwrap(); assert_eq!(out.hits.len(), 2); } @@ -1141,7 +1141,7 @@ mod tests { assert_eq!(recovered.indexed_embeddings(), 1); let mut options = QueryOptions::with_top_k(1); - options.namespace = Some("agent1".to_string()); + options.collection = Some("agent1".to_string()); let out = recovered.query("hello", options, &TestEmbedder).unwrap(); assert_eq!(out.hits.len(), 1); assert_eq!(out.hits[0].id, MemoryId(77)); @@ -1246,7 +1246,7 @@ mod tests { let snapshot = store.snapshot(); let mut options = QueryOptions::with_top_k(10); - options.namespace = Some("agent1".to_string()); + options.collection = Some("agent1".to_string()); let plan = QueryPlanner::plan(options, snapshot.indexes().vector.len()); let snapshot_for_query = Arc::clone(&snapshot); diff --git a/crates/cortexadb-core/tests/integration.rs b/crates/cortexadb-core/tests/integration.rs index e515569..9eef42d 100644 --- a/crates/cortexadb-core/tests/integration.rs +++ b/crates/cortexadb-core/tests/integration.rs @@ -209,11 +209,11 @@ fn test_graph_edges_persist_across_recovery() { } // --------------------------------------------------------------------------- -// Namespace isolation +// Collection isolation // --------------------------------------------------------------------------- #[test] -fn test_namespace_isolation_persists() { +fn test_collection_isolation_persists() { let dir = TempDir::new().unwrap(); let path = dir.path().join("db"); @@ -221,13 +221,13 @@ fn test_namespace_isolation_persists() { let id_b: u64; { let db = open_db(&path); - id_a = db.remember_in_namespace("agent_a", vec![1.0, 0.0, 0.0], None).unwrap(); - id_b = db.remember_in_namespace("agent_b", vec![1.0, 0.0, 0.0], None).unwrap(); + id_a = db.remember_in_collection("agent_a", vec![1.0, 0.0, 0.0], None).unwrap(); + id_b = db.remember_in_collection("agent_b", vec![1.0, 0.0, 0.0], None).unwrap(); } let db = open_db(&path); - assert_eq!(db.get_memory(id_a).unwrap().namespace, "agent_a"); - assert_eq!(db.get_memory(id_b).unwrap().namespace, "agent_b"); + assert_eq!(db.get_memory(id_a).unwrap().collection, "agent_a"); + assert_eq!(db.get_memory(id_b).unwrap().collection, "agent_b"); } // --------------------------------------------------------------------------- diff --git a/crates/cortexadb-py/.gitignore b/crates/cortexadb-py/.gitignore index f468dd0..75b0181 100644 --- a/crates/cortexadb-py/.gitignore +++ b/crates/cortexadb-py/.gitignore @@ -4,3 +4,6 @@ *.dylib *.pyd __pycache__ +*.dSYM +*.whl +*.egg-info \ No newline at end of file diff --git a/crates/cortexadb-py/cortexadb/chunker.py b/crates/cortexadb-py/cortexadb/chunker.py index 85e663c..fbd3233 100644 --- a/crates/cortexadb-py/cortexadb/chunker.py +++ b/crates/cortexadb-py/cortexadb/chunker.py @@ -10,8 +10,7 @@ """ from __future__ import annotations -from typing import List, Dict, Any, Optional -import typing as t +from typing import List, Dict, Any from . import _cortexadb diff --git a/crates/cortexadb-py/cortexadb/client.py b/crates/cortexadb-py/cortexadb/client.py index 82cc770..030fc63 100644 --- a/crates/cortexadb-py/cortexadb/client.py +++ b/crates/cortexadb-py/cortexadb/client.py @@ -4,16 +4,12 @@ CortexaDBError, Hit, Memory, - Stats, BatchRecord, - CortexaDBNotFoundError, CortexaDBConfigError, - CortexaDBIOError, ) from . import _cortexadb from .embedder import Embedder from .chunker import chunk -from .loader import load_file, get_file_metadata from .replay import ReplayWriter, ReplayReader @@ -139,6 +135,7 @@ def __repr__(self) -> str: return f"Collection(name={self.name!r}, mode={'readonly' if self._readonly else 'readwrite'})" +# Deprecated Alias Namespace = Collection @@ -185,7 +182,6 @@ def open(cls, path: str, **kwargs) -> "CortexaDB": @classmethod def replay(cls, log_path: str, db_path: str, **kwargs) -> "CortexaDB": - from .replay import ReplayReader try: reader = ReplayReader(log_path) except FileNotFoundError as e: @@ -208,7 +204,7 @@ def replay(cls, log_path: str, db_path: str, **kwargs) -> "CortexaDB": text=op.get("text"), vector=op.get("embedding"), metadata=op.get("metadata"), - collection=op.get("collection") or op.get("namespace", "default") + collection=op.get("collection") or "default" ) id_map[op.get("id")] = new_id report["exported"] += 1 @@ -224,7 +220,7 @@ def replay(cls, log_path: str, db_path: str, **kwargs) -> "CortexaDB": report["op_counts"]["unknown"] = report["op_counts"].get("unknown", 0) + 1 if strict: raise CortexaDBError(f"unknown replay op: {op_type}") report["skipped"] += 1 - except Exception as e: + except Exception: if strict: raise report["skipped"] += 1 report["failed"] += 1 @@ -236,11 +232,11 @@ def collection(self, name: str, **kwargs) -> Collection: """Access a scoped collection.""" return Collection(self, name, **kwargs) - def namespace(self, *a, **k): return self.collection(*a, **k) + def add(self, text=None, vector=None, metadata=None, collection=None, **kwargs) -> int: """Add a memory.""" - collection = collection or kwargs.get("collection") or kwargs.get("namespace", "default") + collection = collection or kwargs.get("collection") or "default" vector = vector or kwargs.get("vector") or kwargs.get("embedding") vec = self._resolve_embedding(text, vector) content = text or "" @@ -259,7 +255,7 @@ def search( """Core search implementation.""" limit = limit or kwargs.get("limit") or kwargs.get("top_k", 5) vector = vector or kwargs.get("vector") or kwargs.get("embedding") or kwargs.get("query_vector") - collections = collections or kwargs.get("collections") or kwargs.get("namespaces") + collections = collections or kwargs.get("collections") or kwargs.get("collection") vec = self._resolve_embedding(query, vector) if collections is None: @@ -353,7 +349,7 @@ def add_batch(self, records: t.List[t.Dict]) -> t.List[int]: """High-performance batch add.""" facade_records = [ BatchRecord( - collection=r.get("collection") or r.get("namespace") or "default", + collection=r.get("collection") or "default", content=r.get("text") or "", embedding=self._resolve_embedding(r.get("text"), r.get("vector")), metadata=r.get("metadata") @@ -373,7 +369,7 @@ def ingest(self, text: str, **kwargs) -> t.List[int]: "text": c["text"], "vector": vec, "metadata": {** (kwargs.get("metadata") or {}), **(c.get("metadata") or {})}, - "collection": kwargs.get("collection") or kwargs.get("namespace", "default") + "collection": kwargs.get("collection") or "default" } for c, vec in zip(chunks, embeddings)] return self.add_batch(records) diff --git a/crates/cortexadb-py/cortexadb/loader.py b/crates/cortexadb-py/cortexadb/loader.py index f1ea7c3..75fe4bf 100644 --- a/crates/cortexadb-py/cortexadb/loader.py +++ b/crates/cortexadb-py/cortexadb/loader.py @@ -15,9 +15,8 @@ from __future__ import annotations from pathlib import Path -from typing import Optional, Dict, Any +from typing import Dict, Any -import typing as t def load_file(path: str) -> str: diff --git a/crates/cortexadb-py/cortexadb/providers/ollama.py b/crates/cortexadb-py/cortexadb/providers/ollama.py index 43a450c..3c79fba 100644 --- a/crates/cortexadb-py/cortexadb/providers/ollama.py +++ b/crates/cortexadb-py/cortexadb/providers/ollama.py @@ -20,7 +20,7 @@ from __future__ import annotations -from typing import List, Optional +from typing import List from ..embedder import Embedder diff --git a/crates/cortexadb-py/src/lib.rs b/crates/cortexadb-py/src/lib.rs index ad044c0..549f586 100644 --- a/crates/cortexadb-py/src/lib.rs +++ b/crates/cortexadb-py/src/lib.rs @@ -209,8 +209,6 @@ struct PyMemory { #[pyo3(get)] collection: String, #[pyo3(get)] - namespace: String, - #[pyo3(get)] created_at: u64, #[pyo3(get)] importance: f32, @@ -403,7 +401,7 @@ impl PyCortexaDB { let id = py .allow_threads(|| { if content.is_empty() { - self.inner.remember_in_namespace(&collection, embedding, metadata) + self.inner.remember_in_collection(&collection, embedding, metadata) } else { self.inner.remember_with_content( &collection, @@ -441,7 +439,7 @@ impl PyCortexaDB { let facade_records: Vec = records .into_iter() .map(|r| facade::BatchRecord { - namespace: r.collection, + collection: r.collection, content: r.content, embedding: r.embedding, metadata: r.metadata, @@ -524,7 +522,7 @@ impl PyCortexaDB { } let results = py - .allow_threads(|| self.inner.ask_in_namespace(collection, embedding, top_k, filter)) + .allow_threads(|| self.inner.ask_in_collection(collection, embedding, top_k, filter)) .map_err(map_cortexadb_err)?; Ok(results.into_iter().map(|m| m.into()).collect::>()) @@ -546,8 +544,7 @@ impl PyCortexaDB { Ok(PyMemory { id: entry.id, - collection: entry.namespace.clone(), - namespace: entry.namespace.clone(), + collection: entry.collection.clone(), created_at: entry.created_at, importance: entry.importance, content: entry.content.clone(), diff --git a/crates/cortexadb-py/test_smoke.py b/crates/cortexadb-py/test_smoke.py index fad7a06..2b3c48e 100644 --- a/crates/cortexadb-py/test_smoke.py +++ b/crates/cortexadb-py/test_smoke.py @@ -47,19 +47,19 @@ def test_cortexadb_basic_flow(): db.checkpoint() -def test_cortexadb_namespaces(): +def test_cortexadb_collections(): db = CortexaDB.open(DB_PATH, dimension=3) - agent_a = db.namespace("agent_a") - agent_b = db.namespace("agent_b") + col_a = db.collection("agent_a") + col_b = db.collection("agent_b") - id_a = agent_a.remember("I am Agent A", embedding=[1.0, 0.0, 0.0]) - agent_b.remember("I am Agent B", embedding=[0.0, 1.0, 0.0]) + id_a = col_a.remember("I am Agent A", embedding=[1.0, 0.0, 0.0]) + col_b.remember("I am Agent B", embedding=[0.0, 1.0, 0.0]) assert db.get(id_a).collection == "agent_a" - # Test ask filters by namespace using the wrapper - hits_a = agent_a.search("Agent A", embedding=[1.0, 0.0, 0.0]) + # Test search filters by collection using the wrapper + hits_a = col_a.search("Agent A", embedding=[1.0, 0.0, 0.0]) assert len(hits_a) == 1 assert hits_a[0].id == id_a @@ -154,40 +154,40 @@ def test_ingest_document_requires_embedder(): with pytest.raises(CortexaDBError, match="ingest_document"): db.ingest("some text") -def test_namespace_auto_embed(): +def test_collection_auto_embed(): emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) - ns = db.namespace("agent_a") - mid = ns.remember("I am agent A") + col = db.collection("agent_a") + mid = col.remember("I am agent A") assert db.get(mid).collection == "agent_a" - hits = ns.search("agent A") + hits = col.search("agent A") assert any(h.id == mid for h in hits) # Namespace Model -def test_namespace_isolation(): - """Memories in namespace A should not appear in namespace B results.""" +def test_collection_isolation(): + """Memories in collection A should not appear in collection B results.""" emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) - agent_a = db.namespace("agent_a") - agent_b = db.namespace("agent_b") + col_a = db.collection("agent_a") + col_b = db.collection("agent_b") - mid_a = agent_a.remember("I am agent A, secret info") - mid_b = agent_b.remember("I am agent B, different info") + mid_a = col_a.remember("I am agent A, secret info") + mid_b = col_b.remember("I am agent B, different info") - hits_a = agent_a.search("agent A", top_k=10) - hits_b = agent_b.search("agent B", top_k=10) + hits_a = col_a.search("agent A", top_k=10) + hits_b = col_b.search("agent B", top_k=10) a_ids = {h.id for h in hits_a} b_ids = {h.id for h in hits_b} - assert mid_a in a_ids, "Agent A memory not found in agent_a namespace" - assert mid_b not in a_ids, "Agent B memory leaked into agent_a namespace" - assert mid_b in b_ids, "Agent B memory not found in agent_b namespace" - assert mid_a not in b_ids, "Agent A memory leaked into agent_b namespace" + assert mid_a in a_ids, "Agent A memory not found in agent_a collection" + assert mid_b not in a_ids, "Agent B memory leaked into agent_a collection" + assert mid_b in b_ids, "Agent B memory not found in agent_b collection" + assert mid_a not in b_ids, "Agent A memory leaked into agent_b collection" -def test_namespaced_ask_param(): +def test_collection_search_param(): """db.search(query, collections=[...]) should scope results correctly.""" emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) @@ -196,7 +196,7 @@ def test_namespaced_ask_param(): mid_b = db.remember("Agent B private", collection="agent_b") mid_s = db.remember("Shared knowledge", collection="shared") - # Single namespace via collections= param + # Single collection via collections= param hits = db.search("knowledge", collections=["shared"]) ids = {h.id for h in hits} assert mid_s in ids @@ -204,7 +204,7 @@ def test_namespaced_ask_param(): assert mid_b not in ids -def test_cross_namespace_fan_out(): +def test_cross_collection_fan_out(): """collections=[a, b] should return merged re-ranked results from both.""" emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) @@ -221,7 +221,7 @@ def test_cross_namespace_fan_out(): assert mid_s in ids -def test_global_ask_returns_all_namespaces(): +def test_global_search_returns_all_collections(): """db.search(query) with no collections= should search globally.""" emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) @@ -237,16 +237,16 @@ def test_global_ask_returns_all_namespaces(): assert mid_s in ids -def test_readonly_namespace(): - """A readonly namespace should allow search() but reject remember().""" +def test_readonly_collection(): + """A readonly collection should allow search() but reject remember().""" emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) # Write to shared normally. - mid = db.namespace("shared").remember("Public knowledge") + mid = db.collection("shared").remember("Public knowledge") # Read from a readonly view. - ro = db.namespace("shared", readonly=True) + ro = db.collection("shared", readonly=True) hits = ro.search("Public knowledge") assert any(h.id == mid for h in hits) @@ -259,7 +259,6 @@ def test_readonly_namespace(): # Deterministic Replay import json -import tempfile from cortexadb import ReplayReader LOG_PATH = "/tmp/cortexadb_replay_test.log" @@ -329,8 +328,8 @@ def test_replay_connect_id_mapping(cleanup_replay): assert len(db2) == 2 -def test_replay_namespace_preserved(cleanup_replay): - """Replay should preserve original namespaces.""" +def test_replay_collection_preserved(cleanup_replay): + """Replay should preserve original collections.""" with CortexaDB.open(DB_PATH, dimension=3, record=LOG_PATH) as db: db.remember("In A", embedding=[1.0, 0.0, 0.0], collection="agent_a") db.remember("In B", embedding=[0.0, 1.0, 0.0], collection="agent_b") @@ -471,7 +470,7 @@ def test_hybrid_use_graph(): assert hits_graph[1].score >= hits_normal[0].score * 0.89 -def test_hybrid_use_graph_respects_namespaces(monkeypatch): +def test_hybrid_use_graph_respects_collections(monkeypatch): import cortexadb db = cortexadb.CortexaDB.open(DB_PATH, dimension=2, sync="strict") @@ -479,7 +478,7 @@ def test_hybrid_use_graph_respects_namespaces(monkeypatch): id_b = db.remember("Node B", embedding=[0.0, 1.0], collection="agent_b") def fake_get_neighbors(_mid): - # Simulate an unexpected backend neighbor response across namespaces. + # Simulate an unexpected backend neighbor response across collections. return [(id_b, "forced")] monkeypatch.setattr(type(db._inner), "get_neighbors", lambda self, mid: fake_get_neighbors(mid)) diff --git a/docs/api/python.md b/docs/api/python.md index 22a9fbb..feb199e 100644 --- a/docs/api/python.md +++ b/docs/api/python.md @@ -138,7 +138,7 @@ Retrieves a full memory entry by ID. mem = db.get_memory(42) print(mem.id) # 42 print(mem.content) # b"User prefers dark mode" -print(mem.namespace) # "default" +print(mem.collection) # "default" print(mem.metadata) # {"source": "onboarding"} print(mem.created_at) # 1709654400 print(mem.importance) # 0.0 @@ -186,7 +186,7 @@ db.connect(1, 2, "relates_to") db.connect(1, 3, "caused_by") ``` -> Both memories must be in the same namespace. Cross-namespace edges are forbidden. +> Both memories must be in the same collection. Cross-collection edges are forbidden. --- @@ -213,7 +213,7 @@ for edge in neighbors: ## Document Ingestion -### `.ingest(text, strategy="recursive", chunk_size=512, overlap=50, metadata=None, namespace=None)` +### `.ingest(text, strategy="recursive", chunk_size=512, overlap=50, metadata=None, collection=None)` Chunks text and stores each chunk as a memory. @@ -226,13 +226,13 @@ Chunks text and stores each chunk as a memory. | `chunk_size` | `int` | `512` | Target chunk size in characters | | `overlap` | `int` | `50` | Overlap between chunks | | `metadata` | `dict?` | `None` | Metadata to attach to all chunks | -| `namespace` | `str?` | `None` | Target namespace | +| `collection` | `str?` | `None` | Target collection | **Returns:** `list[int]` - Memory IDs of stored chunks --- -### `.load(file_path, strategy="markdown", chunk_size=512, overlap=50, metadata=None, namespace=None)` +### `.load(file_path, strategy="markdown", chunk_size=512, overlap=50, metadata=None, collection=None)` Loads a file, chunks it, and stores each chunk. @@ -245,7 +245,7 @@ Loads a file, chunks it, and stores each chunk. | `chunk_size` | `int` | `512` | Target chunk size | | `overlap` | `int` | `50` | Overlap between chunks | | `metadata` | `dict?` | `None` | Metadata for all chunks | -| `namespace` | `str?` | `None` | Target namespace | +| `collection` | `str?` | `None` | Target collection | **Supported formats:** `.txt`, `.md`, `.json`, `.docx` (requires `cortexadb[docs]`), `.pdf` (requires `cortexadb[pdf]`) @@ -257,34 +257,32 @@ db.load("paper.pdf", strategy="recursive", chunk_size=1024) --- -### `.ingest_document(text, chunk_size=512, overlap=50, metadata=None, namespace=None)` +### `.ingest_document(text, chunk_size=512, overlap=50, metadata=None, collection=None)` Legacy method for chunking and storing text. Uses fixed chunking. --- -## Namespace +### `.collection(name, readonly=False)` -### `.namespace(name, readonly=False)` - -Returns a scoped view of the database for a specific namespace. +Returns a scoped view of the database for a specific collection. **Parameters:** | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `name` | `str` | Required | Namespace name | +| `name` | `str` | Required | Collection name | | `readonly` | `bool` | `False` | If `True`, write operations raise errors | -**Returns:** `Namespace` +**Returns:** `Collection` **Example:** ```python -ns = db.namespace("agent_a") -mid = ns.remember("text") -hits = ns.ask("query") -ns.delete_memory(mid) -ns.ingest_document("long text") +col = db.collection("agent_a") +mid = col.add("text") +hits = col.search("query") +col.delete(mid) +col.ingest("long text") ``` --- @@ -384,7 +382,7 @@ Full memory entry from `.get_memory()`. | Field | Type | Description | |-------|------|-------------| | `id` | `int` | Memory ID | -| `namespace` | `str` | Namespace name | +| `collection` | `str` | Collection name | | `content` | `bytes` | Raw content | | `embedding` | `list[float]?` | Vector embedding | | `metadata` | `dict[str, str]` | Key-value metadata | diff --git a/docs/api/rust.md b/docs/api/rust.md index b5bc047..7247155 100644 --- a/docs/api/rust.md +++ b/docs/api/rust.md @@ -24,22 +24,22 @@ let db = CortexaDB::builder("/path/to/db", config).build()?; #### `remember(embedding, metadata) -> Result` -Stores a memory in the default namespace. +Stores a memory in the default collection. ```rust let id = db.remember(vec![0.1; 128], None)?; let id = db.remember(vec![0.1; 128], Some(metadata_map))?; ``` -#### `remember_in_namespace(ns, embedding, metadata) -> Result` +#### `remember_in_collection(collection, embedding, metadata) -> Result` -Stores a memory in a specific namespace. +Stores a memory in a specific collection. ```rust -let id = db.remember_in_namespace("agent_a", vec![0.1; 128], None)?; +let id = db.remember_in_collection("agent_a", vec![0.1; 128], None)?; ``` -#### `remember_with_content(ns, content, embedding, metadata) -> Result` +#### `remember_with_content(collection, content, embedding, metadata) -> Result` Stores a memory with raw content bytes. @@ -54,7 +54,7 @@ let id = db.remember_with_content( #### `ask(embedding, top_k, metadata_filter) -> Result>` -Vector similarity search in the default namespace. +Vector similarity search in the default collection. ```rust let hits = db.ask(vec![0.1; 128], 5, None)?; @@ -63,12 +63,12 @@ for hit in &hits { } ``` -#### `ask_in_namespace(ns, embedding, top_k, filter) -> Result>` +#### `ask_in_collection(collection, embedding, top_k, filter) -> Result>` -Namespace-scoped search. +Collection-scoped search. ```rust -let hits = db.ask_in_namespace("agent_a", vec![0.1; 128], 5, None)?; +let hits = db.ask_in_collection("agent_a", vec![0.1; 128], 5, None)?; ``` #### `get_memory(id) -> Result` @@ -156,7 +156,7 @@ pub struct Hit { pub struct Memory { pub id: u64, pub content: Vec, - pub namespace: String, + pub collection: String, pub embedding: Option>, pub metadata: HashMap, pub created_at: u64, @@ -171,7 +171,7 @@ Internal representation used by the storage engine. ```rust pub struct MemoryEntry { pub id: MemoryId, - pub namespace: String, + pub collection: String, pub content: Vec, pub embedding: Option>, pub metadata: HashMap, diff --git a/docs/content/docs/api/rust.mdx b/docs/content/docs/api/rust.mdx index 64fb63b..2ab163a 100644 --- a/docs/content/docs/api/rust.mdx +++ b/docs/content/docs/api/rust.mdx @@ -29,22 +29,22 @@ let db = CortexaDBBuilder::new("/path/to/db", 128) #### `remember(embedding, metadata) -> Result` -Stores a memory in the default namespace. +Stores a memory in the default collection. ```rust let id = db.remember(vec![0.1; 128], None)?; let id = db.remember(vec![0.1; 128], Some(metadata_map))?; ``` -#### `remember_in_namespace(ns, embedding, metadata) -> Result` +#### `remember_in_collection(collection, embedding, metadata) -> Result` -Stores a memory in a specific namespace. +Stores a memory in a specific collection. ```rust -let id = db.remember_in_namespace("agent_a", vec![0.1; 128], None)?; +let id = db.remember_in_collection("agent_a", vec![0.1; 128], None)?; ``` -#### `remember_with_content(ns, content, embedding, metadata) -> Result` +#### `remember_with_content(collection, content, embedding, metadata) -> Result` Stores a memory with raw content bytes. @@ -59,7 +59,7 @@ let id = db.remember_with_content( #### `ask(embedding, top_k, metadata_filter) -> Result>` -Vector similarity search in the default namespace. +Vector similarity search in the default collection. ```rust let hits = db.ask(vec![0.1; 128], 5, None)?; @@ -68,12 +68,12 @@ for hit in &hits { } ``` -#### `ask_in_namespace(ns, embedding, top_k, filter) -> Result>` +#### `ask_in_collection(collection, embedding, top_k, filter) -> Result>` -Namespace-scoped search. +Collection-scoped search. ```rust -let hits = db.ask_in_namespace("agent_a", vec![0.1; 128], 5, None)?; +let hits = db.ask_in_collection("agent_a", vec![0.1; 128], 5, None)?; ``` #### `get_memory(id) -> Result` @@ -180,7 +180,7 @@ pub struct Hit { pub struct Memory { pub id: u64, pub content: Vec, - pub namespace: String, + pub collection: String, pub embedding: Option>, pub metadata: HashMap, pub created_at: u64, @@ -195,7 +195,7 @@ Internal representation used by the storage engine. ```rust pub struct MemoryEntry { pub id: MemoryId, - pub namespace: String, + pub collection: String, pub content: Vec, pub embedding: Option>, pub metadata: HashMap, diff --git a/docs/content/docs/guides/replay.mdx b/docs/content/docs/guides/replay.mdx index 8060a1c..26db678 100644 --- a/docs/content/docs/guides/replay.mdx +++ b/docs/content/docs/guides/replay.mdx @@ -43,8 +43,8 @@ The log file is NDJSON with a header line followed by operation lines: **Operations (lines 2+):** ```json -{"op": "remember", "id": 1, "text": "User likes dark mode", "embedding": [...], "namespace": "default", "metadata": null} -{"op": "remember", "id": 2, "text": "User works at Stripe", "embedding": [...], "namespace": "default", "metadata": null} +{"op": "remember", "id": 1, "text": "User likes dark mode", "embedding": [...], "collection": "default", "metadata": null} +{"op": "remember", "id": 2, "text": "User works at Stripe", "embedding": [...], "collection": "default", "metadata": null} {"op": "connect", "from_id": 1, "to_id": 2, "relation": "relates_to"} {"op": "delete", "id": 1} {"op": "compact"} diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index f9e43aa..728efc3 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -68,7 +68,7 @@ db.ingest("Long article text here...", strategy="markdown") ### 7. Use Namespaces ```python -agent_a = db.namespace("agent_a") +agent_a = db.collection("agent_a") agent_a.remember("Agent A's private memory") hits = agent_a.ask("query only agent A's memories") ``` diff --git a/docs/guides/chunking.md b/docs/guides/chunking.md index 5a98e29..7e5e531 100644 --- a/docs/guides/chunking.md +++ b/docs/guides/chunking.md @@ -127,8 +127,8 @@ for c in chunks: # Chunk and store text (uses embedder for auto-embedding) ids = db.ingest("Long article text...", strategy="recursive", chunk_size=512) -# Chunk and store with namespace -ids = db.ingest("text", strategy="markdown", namespace="docs") +# Chunk and store with collection +ids = db.ingest("text", strategy="markdown", collection="docs") ``` ### Load from File diff --git a/docs/guides/collections.md b/docs/guides/collections.md new file mode 100644 index 0000000..457729e --- /dev/null +++ b/docs/guides/collections.md @@ -0,0 +1,157 @@ +# Collections + +Collections allow you to isolate memories between different agents, workspaces, or contexts within a single CortexaDB database file. + +## Overview + +Every memory in CortexaDB belongs to a collection. The default collection is `"default"`. Collections provide: + +- **Isolation** - Queries only return results from the target collection +- **Organization** - Group memories by agent, user, or topic +- **Access Control** - Read-only collections for shared knowledge + +--- + +## Basic Usage + +### Accessing a Collection + +```python +# Get a collection handle +agent_a = db.collection("agent_a") +agent_b = db.collection("agent_b") +``` + +### Writing to a Collection + +```python +agent_a.add("Agent A's private memory") +agent_b.add("Agent B's private memory") +``` + +### Querying a Collection + +```python +# Only searches within agent_a's memories +hits = agent_a.search("What do I know?") + +# Only searches within agent_b's memories +hits = agent_b.search("What do I know?") +``` + +### Deleting from a Collection + +```python +agent_a.delete(memory_id) +``` + +### Ingesting Documents + +```python +agent_a.ingest("Long document...", chunk_size=512) +``` + +--- + +## Default Collection + +When you use the top-level `db.add()` and `db.search()`, memories are stored in and queried from the `"default"` collection. + +```python +# These are equivalent: +db.add("text") +db.collection("default").add("text") +``` + +--- + +## Read-only Collections + +You can create read-only collection handles for shared knowledge that shouldn't be modified: + +```python +shared = db.collection("shared_knowledge", readonly=True) + +# Reading works fine +hits = shared.search("query") + +# Writing raises CortexaDBError +shared.add("text") # Error! +``` + +This is useful for multi-agent systems where some agents should only read from a shared knowledge base. + +--- + +## Graph Edge Rules + +Graph edges are collection-scoped. You **cannot** create edges between memories in different collections: + +```python +agent_a = db.collection("agent_a") +agent_b = db.collection("agent_b") + +mid1 = agent_a.add("Memory in A") +mid2 = agent_b.add("Memory in B") + +# This will raise an error - cross-collection edges are forbidden +db.connect(mid1, mid2, "relates_to") +``` + +Graph traversal during queries also respects collection boundaries — BFS will not cross into other collections. + +--- + +## Common Patterns + +### Multi-Agent System + +```python +db = CortexaDB.open("agents.mem", dimension=128) + +# Each agent has its own collection +planner = db.collection("planner") +researcher = db.collection("researcher") +writer = db.collection("writer") + +# Agents store memories independently +planner.add("Task: Write a blog post about AI") +researcher.add("Found 3 relevant papers on AI agents") +writer.add("Draft: AI agents are transforming...") + +# Each agent queries only its own memories +planner_context = planner.search("What tasks are pending?") +``` + +### Shared Knowledge Base + +```python +# Admin writes to shared collection +shared = db.collection("shared") +shared.add("Company policy: All code must be reviewed") + +# Agents read from shared collection (read-only) +agent = db.collection("shared", readonly=True) +hits = agent.search("What are the code review rules?") +``` + +### Per-User Memory + +```python +def get_user_memory(db, user_id): + return db.collection(f"user_{user_id}") + +alice = get_user_memory(db, "alice") +alice.add("Alice prefers dark mode") + +bob = get_user_memory(db, "bob") +bob.add("Bob prefers light mode") +``` + +--- + +## Next Steps + +- [Core Concepts](./core-concepts.md) - How collections fit into the architecture +- [Query Engine](./query-engine.md) - How collection scoping affects queries +- [Python API](../api/python.md) - Collection API reference diff --git a/docs/guides/core-concepts.md b/docs/guides/core-concepts.md index b04991c..8423ce7 100644 --- a/docs/guides/core-concepts.md +++ b/docs/guides/core-concepts.md @@ -77,7 +77,7 @@ The `CortexaDBStore` coordinates concurrent access: The in-memory state machine holds the current database state: - All memory entries indexed by ID -- Graph edges (directed, per-namespace) +- Graph edges (directed, per-collection) - Temporal index (BTreeMap of timestamp to memory IDs) - Next ID counter @@ -112,7 +112,7 @@ A memory is the fundamental unit of storage: | Field | Type | Description | |-------|------|-------------| | `id` | `u64` | Auto-incrementing unique identifier | -| `namespace` | `String` | Isolation scope (default: `"default"`) | +| `collection` | `String` | Isolation scope (default: `"default"`) | | `content` | `bytes` | Raw content (typically UTF-8 text) | | `embedding` | `Vec?` | Optional vector embedding | | `metadata` | `Dict[str, str]` | Key-value metadata pairs | @@ -127,7 +127,7 @@ Edges are directed relationships between memories: Memory A --[relates_to]--> Memory B ``` -- Edges are namespaced — you cannot create edges across namespaces +- Edges are collection-scoped — you cannot create edges across collections - Each memory can have multiple outgoing edges - Used by the query engine for graph expansion during hybrid search diff --git a/docs/guides/namespaces.md b/docs/guides/namespaces.md deleted file mode 100644 index a407d3b..0000000 --- a/docs/guides/namespaces.md +++ /dev/null @@ -1,157 +0,0 @@ -# Namespaces - -Namespaces allow you to isolate memories between different agents, workspaces, or contexts within a single CortexaDB database file. - -## Overview - -Every memory in CortexaDB belongs to a namespace. The default namespace is `"default"`. Namespaces provide: - -- **Isolation** - Queries only return results from the target namespace -- **Organization** - Group memories by agent, user, or topic -- **Access Control** - Readonly namespaces for shared knowledge - ---- - -## Basic Usage - -### Creating a Namespace - -```python -# Get a namespace handle -agent_a = db.namespace("agent_a") -agent_b = db.namespace("agent_b") -``` - -### Writing to a Namespace - -```python -agent_a.remember("Agent A's private memory") -agent_b.remember("Agent B's private memory") -``` - -### Querying a Namespace - -```python -# Only searches within agent_a's memories -hits = agent_a.ask("What do I know?") - -# Only searches within agent_b's memories -hits = agent_b.ask("What do I know?") -``` - -### Deleting from a Namespace - -```python -agent_a.delete_memory(memory_id) -``` - -### Ingesting Documents - -```python -agent_a.ingest_document("Long document...", chunk_size=512) -``` - ---- - -## Default Namespace - -When you use the top-level `db.remember()` and `db.ask()`, memories are stored in and queried from the `"default"` namespace. - -```python -# These are equivalent: -db.remember("text") -db.namespace("default").remember("text") -``` - ---- - -## Readonly Namespaces - -You can create readonly namespace handles for shared knowledge that shouldn't be modified: - -```python -shared = db.namespace("shared_knowledge", readonly=True) - -# Reading works fine -hits = shared.ask("query") - -# Writing raises CortexaDBError -shared.remember("text") # Error! -``` - -This is useful for multi-agent systems where some agents should only read from a shared knowledge base. - ---- - -## Graph Edge Rules - -Graph edges are namespace-scoped. You **cannot** create edges between memories in different namespaces: - -```python -agent_a = db.namespace("agent_a") -agent_b = db.namespace("agent_b") - -mid1 = agent_a.remember("Memory in A") -mid2 = agent_b.remember("Memory in B") - -# This will raise an error - cross-namespace edges are forbidden -db.connect(mid1, mid2, "relates_to") -``` - -Graph traversal during queries also respects namespace boundaries — BFS will not cross into other namespaces. - ---- - -## Common Patterns - -### Multi-Agent System - -```python -db = CortexaDB.open("agents.mem", embedder=embedder) - -# Each agent has its own namespace -planner = db.namespace("planner") -researcher = db.namespace("researcher") -writer = db.namespace("writer") - -# Agents store memories independently -planner.remember("Task: Write a blog post about AI") -researcher.remember("Found 3 relevant papers on AI agents") -writer.remember("Draft: AI agents are transforming...") - -# Each agent queries only its own memories -planner_context = planner.ask("What tasks are pending?") -``` - -### Shared Knowledge Base - -```python -# Admin writes to shared namespace -shared = db.namespace("shared") -shared.remember("Company policy: All code must be reviewed") - -# Agents read from shared namespace (readonly) -agent = db.namespace("shared", readonly=True) -hits = agent.ask("What are the code review rules?") -``` - -### Per-User Memory - -```python -def get_user_memory(db, user_id): - return db.namespace(f"user_{user_id}") - -alice = get_user_memory(db, "alice") -alice.remember("Alice prefers dark mode") - -bob = get_user_memory(db, "bob") -bob.remember("Bob prefers light mode") -``` - ---- - -## Next Steps - -- [Core Concepts](./core-concepts.md) - How namespaces fit into the architecture -- [Query Engine](./query-engine.md) - How namespace scoping affects queries -- [Python API](../api/python.md) - Namespace API reference diff --git a/docs/guides/query-engine.md b/docs/guides/query-engine.md index e6cbdd5..e601c4b 100644 --- a/docs/guides/query-engine.md +++ b/docs/guides/query-engine.md @@ -58,7 +58,7 @@ The query planner selects the optimal execution path based on the options provid ### Exact Mode (Default) -Brute-force cosine similarity scan over all embeddings in the target namespace. +Brute-force cosine similarity scan over all embeddings in the target collection. - **Complexity**: O(n) - **Recall**: 100% @@ -82,7 +82,7 @@ To improve result quality with filtering, the vector search fetches more candida candidate_k = top_k * candidate_multiplier ``` -The default multiplier is 4 for namespace-scoped queries (to account for filtering overhead). +The default multiplier is 4 for collection-scoped queries (to account for filtering overhead). --- @@ -100,7 +100,7 @@ When `use_graph=True`, the query engine expands results using BFS traversal: hits = db.ask("query", use_graph=True) ``` -Graph expansion only follows edges within the same namespace. +Graph expansion only follows edges within the same collection. ### Hop Depth @@ -156,16 +156,16 @@ Metadata filtering is applied after vector search but before final scoring. --- -## Namespace Scoping +## Collection Scoping -Queries can be scoped to a specific namespace: +Queries can be scoped to a specific collection: ```python -ns = db.namespace("agent_a") -hits = ns.ask("query") # Only searches agent_a's memories +col = db.collection("agent_a") +hits = col.query("query").execute() ``` -When querying within a namespace, the engine over-fetches candidates globally (4x `top_k`), filters to the target namespace, then returns the final `top_k`. +When querying within a collection, the engine over-fetches candidates globally (4x `top_k`), filters to the target collection, then returns the final `top_k`. --- @@ -205,5 +205,5 @@ hits = db.ask("query", top_k=10, use_graph=True, recency_bias=True) ## Next Steps - [Indexing](./indexing.md) - Configure exact vs HNSW search -- [Namespaces](./namespaces.md) - Multi-agent memory isolation +- [Collections](./collections.md) - Multi-agent memory isolation - [Python API](../api/python.md) - Full API reference diff --git a/docs/guides/replay.md b/docs/guides/replay.md index f64e1bf..1a4ce54 100644 --- a/docs/guides/replay.md +++ b/docs/guides/replay.md @@ -40,8 +40,8 @@ The log file is NDJSON with a header line followed by operation lines: **Operations (lines 2+):** ```json -{"op": "remember", "id": 1, "text": "User likes dark mode", "embedding": [...], "namespace": "default", "metadata": null} -{"op": "remember", "id": 2, "text": "User works at Stripe", "embedding": [...], "namespace": "default", "metadata": null} +{"op": "remember", "id": 1, "text": "User likes dark mode", "embedding": [...], "collection": "default", "metadata": null} +{"op": "remember", "id": 2, "text": "User works at Stripe", "embedding": [...], "collection": "default", "metadata": null} {"op": "connect", "from_id": 1, "to_id": 2, "relation": "relates_to"} {"op": "delete", "id": 1} {"op": "compact"} diff --git a/docs/index.md b/docs/index.md index 86102de..a813e06 100644 --- a/docs/index.md +++ b/docs/index.md @@ -18,7 +18,7 @@ Think of it as **SQLite, but with semantic and relational intelligence for your - [Query Engine](./guides/query-engine.md) - Hybrid search with vector, graph, and temporal scoring - [Indexing](./guides/indexing.md) - Exact search vs HNSW approximate nearest neighbor - [Chunking](./guides/chunking.md) - Document ingestion and chunking strategies -- [Namespaces](./guides/namespaces.md) - Multi-agent memory isolation +- [Collections](./guides/collections.md) - Multi-agent memory isolation - [Embedders](./guides/embedders.md) - Embedding providers (OpenAI, Gemini, Ollama, Hash) - [Replay & Recording](./guides/replay.md) - Deterministic session recording and replay - [Configuration](./guides/configuration.md) - All configuration options explained diff --git a/docs/resources/examples.md b/docs/resources/examples.md index 49fd4db..ccaac8c 100644 --- a/docs/resources/examples.md +++ b/docs/resources/examples.md @@ -50,10 +50,10 @@ hits = db.ask("Who works at Acme?", use_graph=True) ```python db = CortexaDB.open("agents.mem", embedder=embedder) -# Each agent has isolated memory -planner = db.namespace("planner") -researcher = db.namespace("researcher") -writer = db.namespace("writer") +# Each agent has its own collection +planner = db.collection("planner") +researcher = db.collection("researcher") +writer = db.collection("writer") # Agents store memories independently planner.remember("Task: Write a blog post about vector databases") @@ -64,11 +64,12 @@ writer.remember("Draft intro: Vector databases are transforming AI...") # Each agent queries only its own memories research = researcher.ask("What did I find about indexing?") -# Shared knowledge base (readonly for agents) -shared = db.namespace("shared") -shared.remember("Company style guide: Use active voice") +# Admin writes to shared collection +shared = db.collection("shared") +shared.add("Company policy: All code must be reviewed") -agent_view = db.namespace("shared", readonly=True) +# Agents read from shared collection (read-only) +agent_view = db.collection("shared", readonly=True) guidelines = agent_view.ask("What is the writing style?") ``` diff --git a/examples/rust/basic_usage.rs b/examples/rust/basic_usage.rs index 84d299b..75521b7 100644 --- a/examples/rust/basic_usage.rs +++ b/examples/rust/basic_usage.rs @@ -6,7 +6,7 @@ //! - Using chunking strategies //! - Hybrid search //! - Graph relationships -//! - Namespace-scoped operations +//! - Collection-scoped operations use cortexadb_core::{chunk, ChunkingStrategy, CortexaDB}; use std::collections::hash_map::DefaultHasher; @@ -128,19 +128,19 @@ Content under heading 3. let records = vec![ BatchRecord { - namespace: "default".to_string(), + collection: "default".to_string(), content: text1.as_bytes().to_vec(), embedding: Some(embed_text(text1, dimension)), metadata: None, }, BatchRecord { - namespace: "default".to_string(), + collection: "default".to_string(), content: text2.as_bytes().to_vec(), embedding: Some(embed_text(text2, dimension)), metadata: None, }, BatchRecord { - namespace: "default".to_string(), + collection: "default".to_string(), content: text3.as_bytes().to_vec(), embedding: Some(embed_text(text3, dimension)), metadata: None, @@ -177,24 +177,24 @@ Content under heading 3. } // ----------------------------------------------------------- - // 8. Namespace-scoped retrieval + // 8. Collection-scoped retrieval // ----------------------------------------------------------- - println!("\n[7] Namespaces..."); + println!("\n[7] Collections..."); let travel_text = "Flight to Tokyo booked for June."; - let ns_id = db.remember_with_content( + let col_id = db.remember_with_content( "travel_agent", travel_text.as_bytes().to_vec(), embed_text(travel_text, dimension), None, )?; - println!(" Stored in namespace 'travel_agent': ID {}", ns_id); - let ns_hits = db.ask_in_namespace( + println!(" Stored in collection 'travel_agent': ID {}", col_id); + let col_hits = db.ask_in_collection( "travel_agent", embed_text("Tokyo travel plans", dimension), 5, None, )?; - println!(" travel_agent hits: {}", ns_hits.len()); + println!(" travel_agent hits: {}", col_hits.len()); // ----------------------------------------------------------- // 9. Stats From b88a7836929f68b2d3a1b9b2872096a316f0b95e Mon Sep 17 00:00:00 2001 From: anaslimem Date: Mon, 9 Mar 2026 04:38:41 +0100 Subject: [PATCH 2/6] Migrated from rememer to add --- .../cortexadb-core/benches/storage_bench.rs | 2 +- .../cortexadb-core/src/bin/startup_bench.rs | 4 +- crates/cortexadb-core/src/facade.rs | 78 +++++++-------- crates/cortexadb-core/tests/integration.rs | 64 ++++++------ crates/cortexadb-py/cortexadb/client.py | 12 +-- crates/cortexadb-py/cortexadb/embedder.py | 4 +- .../cortexadb/providers/gemini.py | 2 +- .../cortexadb/providers/ollama.py | 2 +- .../cortexadb/providers/openai.py | 2 +- crates/cortexadb-py/cortexadb/replay.py | 14 +-- crates/cortexadb-py/src/lib.rs | 12 +-- crates/cortexadb-py/test_smoke.py | 98 +++++++++---------- crates/cortexadb-py/test_stress.py | 6 +- docs/api/python.md | 8 +- docs/api/rust.md | 14 +-- docs/content/docs/api/rust.mdx | 14 +-- .../docs/getting-started/quickstart.mdx | 4 +- docs/content/docs/guides/replay.mdx | 4 +- docs/content/docs/resources/examples.mdx | 4 +- docs/getting-started/quickstart.md | 12 +-- docs/guides/core-concepts.md | 4 +- docs/guides/embedders.md | 8 +- docs/guides/replay.md | 12 +-- docs/index.md | 4 +- docs/resources/examples.md | 36 +++---- examples/rust/basic_usage.rs | 6 +- 26 files changed, 214 insertions(+), 216 deletions(-) diff --git a/crates/cortexadb-core/benches/storage_bench.rs b/crates/cortexadb-core/benches/storage_bench.rs index 2c32533..ee441da 100644 --- a/crates/cortexadb-core/benches/storage_bench.rs +++ b/crates/cortexadb-core/benches/storage_bench.rs @@ -22,7 +22,7 @@ fn bench_ingestion(c: &mut Criterion) { c.bench_function("ingest_single_memory", |b| { b.iter(|| { - db.remember(embedding.clone(), None).unwrap(); + db.add(embedding.clone(), None).unwrap(); }) }); } diff --git a/crates/cortexadb-core/src/bin/startup_bench.rs b/crates/cortexadb-core/src/bin/startup_bench.rs index 8b19fce..014b5c8 100644 --- a/crates/cortexadb-core/src/bin/startup_bench.rs +++ b/crates/cortexadb-core/src/bin/startup_bench.rs @@ -45,7 +45,7 @@ fn main() -> Result<(), Box> { for i in 0..entry_count { let embedding: Vec = (0..vector_dim).map(|d| ((i * 7 + d * 13) % 100) as f32 / 100.0).collect(); - db.remember(embedding, None)?; + db.add(embedding, None)?; } } let seed_elapsed = seed_start.elapsed(); @@ -105,7 +105,7 @@ fn main() -> Result<(), Box> { for i in 0..tail_count { let embedding: Vec = (0..vector_dim).map(|d| ((i * 11 + d * 3) % 100) as f32 / 100.0).collect(); - db.remember(embedding, None)?; + db.add(embedding, None)?; } } diff --git a/crates/cortexadb-core/src/facade.rs b/crates/cortexadb-core/src/facade.rs index 5700eec..2477fb6 100644 --- a/crates/cortexadb-core/src/facade.rs +++ b/crates/cortexadb-core/src/facade.rs @@ -2,7 +2,7 @@ //! //! This is the recommended entry point for using CortexaDB as a library. //! It wraps [`CortexaDBStore`] and hides planner/engine/index details behind -//! five core operations: `open`, `remember`, `ask`, `connect`, `compact`. +//! five core operations: `open`, `add`, `ask`, `connect`, `compact`. use std::collections::HashMap; use std::path::PathBuf; @@ -174,7 +174,7 @@ impl QueryEmbedder for StaticEmbedder { /// .with_sync_policy(cortexadb_core::engine::SyncPolicy::Async { interval_ms: 1000 }) /// .build()?; /// -/// let id = db.remember(vec![1.0, 0.0, 0.0], None)?; +/// let id = db.add(vec![1.0, 0.0, 0.0], None)?; /// let hits = db.ask(vec![1.0, 0.0, 0.0], 5, None)?; /// # Ok(()) /// # } @@ -265,7 +265,7 @@ impl CortexaDB { /// # let db = CortexaDB::open("test", 3)?; /// let mut meta = HashMap::new(); /// meta.insert("type".to_string(), "thought".to_string()); - /// let id = db.remember(vec![0.1, 0.2, 0.3], Some(meta))?; + /// let id = db.add(vec![0.1, 0.2, 0.3], Some(meta))?; /// # Ok(()) /// # } /// ``` @@ -273,16 +273,16 @@ impl CortexaDB { /// # Errors /// /// Returns [`CortexaDBError`] if the write-ahead log fails to append the entry. - pub fn remember( + pub fn add( &self, embedding: Vec, metadata: Option>, ) -> Result { - self.remember_in_collection("default", embedding, metadata) + self.add_in_collection("default", embedding, metadata) } /// Store a new memory in a specific collection. - pub fn remember_in_collection( + pub fn add_in_collection( &self, collection: &str, embedding: Vec, @@ -302,7 +302,7 @@ impl CortexaDB { } /// Store a memory with explicit content bytes optionally in a collection. - pub fn remember_with_content( + pub fn add_with_content( &self, collection: &str, content: Vec, @@ -323,7 +323,7 @@ impl CortexaDB { } /// Store a batch of memories efficiently. - pub fn remember_batch(&self, records: Vec) -> Result> { + pub fn add_batch(&self, records: Vec) -> Result> { let ts = SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or_default().as_secs(); let mut entries = Vec::with_capacity(records.len()); let mut ids = Vec::with_capacity(records.len()); @@ -516,13 +516,13 @@ mod tests { use tempfile::TempDir; #[test] - fn test_open_remember_ask() { + fn test_open_add_ask() { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - let id1 = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - let id2 = db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); + let id1 = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + let id2 = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); assert_ne!(id1, id2); let hits = db.ask(vec![1.0, 0.0, 0.0], 5, None).unwrap(); @@ -536,8 +536,8 @@ mod tests { let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - let id1 = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - let id2 = db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); + let id1 = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + let id2 = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); db.connect(id1, id2, "related").unwrap(); let stats = db.stats(); @@ -553,8 +553,8 @@ mod tests { { let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); + db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + db.add(vec![0.0, 1.0, 0.0], None).unwrap(); } // Reopen — should recover from WAL. @@ -582,12 +582,12 @@ mod tests { { let db = CortexaDB::open_with_config(path.to_str().unwrap(), config.clone()).unwrap(); - db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); + db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + db.add(vec![0.0, 1.0, 0.0], None).unwrap(); db.flush().unwrap(); // ensure WAL is synced before checkpoint truncates it db.checkpoint().unwrap(); // Write more after checkpoint. - db.remember(vec![0.0, 0.0, 1.0], None).unwrap(); + db.add(vec![0.0, 0.0, 1.0], None).unwrap(); } let db = CortexaDB::open_with_config(path.to_str().unwrap(), config).unwrap(); @@ -601,19 +601,19 @@ mod tests { let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); + db.add(vec![1.0, 0.0, 0.0], None).unwrap(); db.compact().unwrap(); } #[test] - fn test_remember_with_metadata() { + fn test_add_with_metadata() { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); let mut meta = HashMap::new(); meta.insert("source".to_string(), "test".to_string()); - let id = db.remember(vec![1.0, 0.0, 0.0], Some(meta)).unwrap(); + let id = db.add(vec![1.0, 0.0, 0.0], Some(meta)).unwrap(); let hits = db.ask(vec![1.0, 0.0, 0.0], 1, None).unwrap(); assert_eq!(hits[0].id, id); @@ -628,8 +628,8 @@ mod tests { let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - let id1 = db.remember_in_collection("agent_b", vec![0.0, 1.0, 0.0], None).unwrap(); - let _id2 = db.remember_in_collection("agent_c", vec![0.0, 0.0, 1.0], None).unwrap(); + let id1 = db.add_in_collection("agent_b", vec![0.0, 1.0, 0.0], None).unwrap(); + let _id2 = db.add_in_collection("agent_c", vec![0.0, 0.0, 1.0], None).unwrap(); let stats = db.stats(); assert_eq!(stats.entries, 2); @@ -644,7 +644,7 @@ mod tests { let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - let id = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); + let id = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); assert_eq!(db.stats().entries, 1); db.delete_memory(id).unwrap(); @@ -657,10 +657,10 @@ mod tests { let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - let id = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); + let id = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); // Keep a second entry so the index is non-empty after deletion. // (ask() returns NoEmbeddings when the vector index is completely empty.) - let _id_keep = db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); + let _id_keep = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); db.delete_memory(id).unwrap(); @@ -677,7 +677,7 @@ mod tests { let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - let id = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); + let id = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); db.delete_memory(id).unwrap(); let result = db.get_memory(id); @@ -690,9 +690,9 @@ mod tests { let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - let id1 = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - let id2 = db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); - let id3 = db.remember(vec![0.0, 0.0, 1.0], None).unwrap(); + let id1 = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + let id2 = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); + let id3 = db.add(vec![0.0, 0.0, 1.0], None).unwrap(); db.connect(id1, id2, "related").unwrap(); db.connect(id1, id3, "follows").unwrap(); @@ -715,7 +715,7 @@ mod tests { let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - let id = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); + let id = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); let neighbors = db.get_neighbors(id).unwrap(); assert!(neighbors.is_empty(), "node with no edges should return empty neighbors"); } @@ -727,8 +727,8 @@ mod tests { let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); // Same embedding direction — only collection should differentiate results. - let id_a = db.remember_in_collection("ns_a", vec![1.0, 0.0, 0.0], None).unwrap(); - let _id_b = db.remember_in_collection("ns_b", vec![1.0, 0.0, 0.0], None).unwrap(); + let id_a = db.add_in_collection("ns_a", vec![1.0, 0.0, 0.0], None).unwrap(); + let _id_b = db.add_in_collection("ns_b", vec![1.0, 0.0, 0.0], None).unwrap(); let hits = db.ask_in_collection("ns_a", vec![1.0, 0.0, 0.0], 10, None).unwrap(); assert!(!hits.is_empty(), "should find memories in ns_a"); @@ -744,7 +744,7 @@ mod tests { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); + db.add(vec![1.0, 0.0, 0.0], None).unwrap(); db.flush().expect("flush must not fail"); } @@ -753,7 +753,7 @@ mod tests { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); + db.add(vec![1.0, 0.0, 0.0], None).unwrap(); db.compact().expect("compact must not fail"); } @@ -770,11 +770,11 @@ mod tests { // Insert 10 entries in ns_majority to fill the global index. for i in 0..10u32 { let v = vec![i as f32 / 10.0, 1.0 - i as f32 / 10.0, 0.0]; - db.remember_in_collection("ns_majority", v, None).unwrap(); + db.add_in_collection("ns_majority", v, None).unwrap(); } // Insert 2 entries in ns_sparse. - let id_a = db.remember_in_collection("ns_sparse", vec![1.0, 0.0, 0.0], None).unwrap(); - let id_b = db.remember_in_collection("ns_sparse", vec![0.9, 0.1, 0.0], None).unwrap(); + let id_a = db.add_in_collection("ns_sparse", vec![1.0, 0.0, 0.0], None).unwrap(); + let id_b = db.add_in_collection("ns_sparse", vec![0.9, 0.1, 0.0], None).unwrap(); // Ask for top-2 in ns_sparse — both must be returned. let hits = db.ask_in_collection("ns_sparse", vec![1.0, 0.0, 0.0], 2, None).unwrap(); @@ -798,7 +798,7 @@ mod tests { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); - db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); + db.add(vec![1.0, 0.0, 0.0], None).unwrap(); // Default QueryOptions has intent_anchors = None; must produce same results as ask(). let hits = db.ask(vec![1.0, 0.0, 0.0], 5, None).unwrap(); assert!(!hits.is_empty()); diff --git a/crates/cortexadb-core/tests/integration.rs b/crates/cortexadb-core/tests/integration.rs index 9eef42d..5f2bc98 100644 --- a/crates/cortexadb-core/tests/integration.rs +++ b/crates/cortexadb-core/tests/integration.rs @@ -1,6 +1,6 @@ //! Integration tests for CortexaDB. //! -//! These tests exercise the full stack: open → remember → ask → checkpoint → recover. +//! These tests exercise the full stack: open → add → ask → checkpoint → recover. //! Unlike the unit tests in `src/`, these tests run against actual disk files (via tempdir). use cortexadb_core::{CortexaDB, CortexaDBConfig}; @@ -24,17 +24,17 @@ fn open_db_with_config(dir: &TempDir, config: CortexaDBConfig) -> CortexaDB { } // --------------------------------------------------------------------------- -// Basic open → remember → ask → recover +// Basic open → add → ask → recover // --------------------------------------------------------------------------- #[test] -fn test_full_open_remember_ask() { +fn test_full_open_add_ask() { let dir = TempDir::new().unwrap(); let path = dir.path().join("db"); let db = open_db(&path); - let id1 = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - let id2 = db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); + let id1 = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + let id2 = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); let hits = db.ask(vec![1.0, 0.0, 0.0], 5, None).unwrap(); assert!(!hits.is_empty(), "ask should return results"); @@ -52,9 +52,9 @@ fn test_recover_after_drop_restores_entries() { let expected_ids: Vec; { let db = open_db(&path); - let id1 = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - let id2 = db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); - let id3 = db.remember(vec![0.0, 0.0, 1.0], None).unwrap(); + let id1 = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + let id2 = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); + let id3 = db.add(vec![0.0, 0.0, 1.0], None).unwrap(); expected_ids = vec![id1, id2, id3]; // db dropped here (simulates process exit without explicit flush) } @@ -77,9 +77,9 @@ fn test_recover_search_returns_correct_top_hit() { let id_target: u64; { let db = open_db(&path); - db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); - id_target = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - db.remember(vec![0.0, 0.0, 1.0], None).unwrap(); + db.add(vec![0.0, 1.0, 0.0], None).unwrap(); + id_target = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + db.add(vec![0.0, 0.0, 1.0], None).unwrap(); } let db = open_db(&path); @@ -101,12 +101,12 @@ fn test_checkpoint_recovery_preserves_all_entries() { let mut all_ids: Vec = Vec::new(); { let db = open_db(&path); - all_ids.push(db.remember(vec![1.0, 0.0, 0.0], None).unwrap()); - all_ids.push(db.remember(vec![0.0, 1.0, 0.0], None).unwrap()); + all_ids.push(db.add(vec![1.0, 0.0, 0.0], None).unwrap()); + all_ids.push(db.add(vec![0.0, 1.0, 0.0], None).unwrap()); db.flush().unwrap(); // ensure WAL is synced before checkpoint db.checkpoint().unwrap(); // Write one more entry AFTER the checkpoint. - all_ids.push(db.remember(vec![0.0, 0.0, 1.0], None).unwrap()); + all_ids.push(db.add(vec![0.0, 0.0, 1.0], None).unwrap()); } let db = open_db(&path); @@ -125,13 +125,13 @@ fn test_double_checkpoint_recovery() { { let db = open_db(&path); - db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); + db.add(vec![1.0, 0.0, 0.0], None).unwrap(); db.flush().unwrap(); db.checkpoint().unwrap(); - db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); + db.add(vec![0.0, 1.0, 0.0], None).unwrap(); db.flush().unwrap(); db.checkpoint().unwrap(); // second checkpoint - db.remember(vec![0.0, 0.0, 1.0], None).unwrap(); + db.add(vec![0.0, 0.0, 1.0], None).unwrap(); } let db = open_db(&path); @@ -151,8 +151,8 @@ fn test_delete_persists_across_recovery() { let kept_id: u64; { let db = open_db(&path); - deleted_id = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - kept_id = db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); + deleted_id = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + kept_id = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); db.delete_memory(deleted_id).unwrap(); assert_eq!(db.stats().entries, 1); } @@ -172,8 +172,8 @@ fn test_delete_then_checkpoint_recovery() { let deleted_id: u64; { let db = open_db(&path); - deleted_id = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); + deleted_id = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + db.add(vec![0.0, 1.0, 0.0], None).unwrap(); db.delete_memory(deleted_id).unwrap(); db.flush().unwrap(); // ensure WAL is synced before checkpoint db.checkpoint().unwrap(); @@ -196,8 +196,8 @@ fn test_graph_edges_persist_across_recovery() { let (id1, id2): (u64, u64); { let db = open_db(&path); - id1 = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - id2 = db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); + id1 = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + id2 = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); db.connect(id1, id2, "relates_to").unwrap(); } @@ -221,8 +221,8 @@ fn test_collection_isolation_persists() { let id_b: u64; { let db = open_db(&path); - id_a = db.remember_in_collection("agent_a", vec![1.0, 0.0, 0.0], None).unwrap(); - id_b = db.remember_in_collection("agent_b", vec![1.0, 0.0, 0.0], None).unwrap(); + id_a = db.add_in_collection("agent_a", vec![1.0, 0.0, 0.0], None).unwrap(); + id_b = db.add_in_collection("agent_b", vec![1.0, 0.0, 0.0], None).unwrap(); } let db = open_db(&path); @@ -245,7 +245,7 @@ fn test_metadata_persists_across_recovery() { let mut meta = std::collections::HashMap::new(); meta.insert("source".to_string(), "unit_test".to_string()); meta.insert("priority".to_string(), "high".to_string()); - id = db.remember(vec![1.0, 0.0, 0.0], Some(meta)).unwrap(); + id = db.add(vec![1.0, 0.0, 0.0], Some(meta)).unwrap(); } let db = open_db(&path); @@ -274,9 +274,9 @@ fn test_capacity_eviction_keeps_max_entries() { }; let db = open_db_with_config(&dir, config); - db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); - db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); - db.remember(vec![0.0, 0.0, 1.0], None).unwrap(); + db.add(vec![1.0, 0.0, 0.0], None).unwrap(); + db.add(vec![0.0, 1.0, 0.0], None).unwrap(); + db.add(vec![0.0, 0.0, 1.0], None).unwrap(); // After inserting 3 entries with max_entries=2, one should have been evicted. assert_eq!(db.stats().entries, 2, "max_entries=2 must evict oldest entry"); @@ -311,14 +311,14 @@ fn test_hnsw_recovery_sync() { let id_deleted: u64; { let db = open_db_with_config(&dir, config.clone()); - db.remember(vec![0.0, 1.0, 0.0], None).unwrap(); - id_deleted = db.remember(vec![0.0, 0.0, 1.0], None).unwrap(); + db.add(vec![0.0, 1.0, 0.0], None).unwrap(); + id_deleted = db.add(vec![0.0, 0.0, 1.0], None).unwrap(); // Checkpoint saves the HNSW index to disk right now (with these 2 items). db.checkpoint().unwrap(); // Insert a new item AFTER the checkpoint but BEFORE the crash - id_target = db.remember(vec![1.0, 0.0, 0.0], None).unwrap(); + id_target = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); // Delete an item that WAS saved in the HNSW on disk, AFTER the checkpoint db.delete_memory(id_deleted).unwrap(); diff --git a/crates/cortexadb-py/cortexadb/client.py b/crates/cortexadb-py/cortexadb/client.py index 030fc63..8320342 100644 --- a/crates/cortexadb-py/cortexadb/client.py +++ b/crates/cortexadb-py/cortexadb/client.py @@ -126,7 +126,6 @@ def delete(self, mid: int) -> None: self._db.delete(mid) # Legacy Aliases - def remember(self, *a, **k): return self.add(*a, **k) def ask(self, *a, **k): return self.search(*a, **k) def ingest_document(self, *a, **k): return self.ingest(*a, **k) def delete_memory(self, mid: int): self.delete(mid) @@ -199,7 +198,7 @@ def replay(cls, log_path: str, db_path: str, **kwargs) -> "CortexaDB": report["op_counts"][op_type] = report["op_counts"].get(op_type, 0) + 1 try: - if op_type == "remember": + if op_type in ("add", "remember"): new_id = db.add( text=op.get("text"), vector=op.get("embedding"), @@ -240,9 +239,9 @@ def add(self, text=None, vector=None, metadata=None, collection=None, **kwargs) vector = vector or kwargs.get("vector") or kwargs.get("embedding") vec = self._resolve_embedding(text, vector) content = text or "" - mid = self._inner.remember_embedding(vec, metadata=metadata, collection=collection, content=content) + mid = self._inner.add_embedding(vec, metadata=metadata, collection=collection, content=content) if self._recorder: - self._recorder.record_remember(id=mid, text=content, embedding=vec, collection=collection, metadata=metadata) + self._recorder.record_add(id=mid, text=content, embedding=vec, collection=collection, metadata=metadata) return mid def search( @@ -323,7 +322,7 @@ def export_replay(self, path: str): try: mem = self.get(i) if mem.embedding: - writer.record_remember( + writer.record_add( id=mem.id, text=bytes(mem.content).decode("utf-8") if mem.content else "", embedding=mem.embedding, @@ -355,7 +354,7 @@ def add_batch(self, records: t.List[t.Dict]) -> t.List[int]: metadata=r.get("metadata") ) for r in records ] - return self._inner.remember_batch(facade_records) + return self._inner.add_batch(facade_records) def ingest(self, text: str, **kwargs) -> t.List[int]: """Ingest text with 100x speedup via batching.""" @@ -393,7 +392,6 @@ def __exit__(self, *a): return False # Legacy Aliases - def remember(self, *a, **k): return self.add(*a, **k) def ask(self, *a, **k): return self.search(*a, **k) def ingest_document(self, *a, **k): return self.ingest(*a, **k) def delete_memory(self, mid: int): self.delete(mid) diff --git a/crates/cortexadb-py/cortexadb/embedder.py b/crates/cortexadb-py/cortexadb/embedder.py index 811702c..2958575 100644 --- a/crates/cortexadb-py/cortexadb/embedder.py +++ b/crates/cortexadb-py/cortexadb/embedder.py @@ -9,7 +9,7 @@ from cortexadb.providers.openai import OpenAIEmbedder db = CortexaDB.open("agent.mem", embedder=OpenAIEmbedder(api_key="sk-...")) - db.remember("We chose Stripe for payments") # embeds automatically + db.add("We chose Stripe for payments") # embeds automatically hits = db.ask("payment provider?") # embeds query automatically """ @@ -89,7 +89,7 @@ class HashEmbedder(Embedder): from cortexadb import HashEmbedder db = CortexaDB.open("/tmp/test.mem", embedder=HashEmbedder(dimension=64)) - db.remember("hello world") + db.add("hello world") """ def __init__(self, dimension: int = 64) -> None: diff --git a/crates/cortexadb-py/cortexadb/providers/gemini.py b/crates/cortexadb-py/cortexadb/providers/gemini.py index ed1af18..13f4e5a 100644 --- a/crates/cortexadb-py/cortexadb/providers/gemini.py +++ b/crates/cortexadb-py/cortexadb/providers/gemini.py @@ -13,7 +13,7 @@ "agent.mem", embedder=GeminiEmbedder(api_key="AIza...", model="models/text-embedding-004"), ) - db.remember("We chose Stripe for payments") + db.add("We chose Stripe for payments") hits = db.ask("payment provider?") """ diff --git a/crates/cortexadb-py/cortexadb/providers/ollama.py b/crates/cortexadb-py/cortexadb/providers/ollama.py index 3c79fba..7887693 100644 --- a/crates/cortexadb-py/cortexadb/providers/ollama.py +++ b/crates/cortexadb-py/cortexadb/providers/ollama.py @@ -14,7 +14,7 @@ "agent.mem", embedder=OllamaEmbedder(model="nomic-embed-text"), ) - db.remember("We chose Stripe for payments") + db.add("We chose Stripe for payments") hits = db.ask("payment provider?") """ diff --git a/crates/cortexadb-py/cortexadb/providers/openai.py b/crates/cortexadb-py/cortexadb/providers/openai.py index b88b4f6..9ffd7f8 100644 --- a/crates/cortexadb-py/cortexadb/providers/openai.py +++ b/crates/cortexadb-py/cortexadb/providers/openai.py @@ -13,7 +13,7 @@ "agent.mem", embedder=OpenAIEmbedder(api_key="sk-...", model="text-embedding-3-small"), ) - db.remember("We chose Stripe for payments") + db.add("We chose Stripe for payments") hits = db.ask("payment provider?") """ diff --git a/crates/cortexadb-py/cortexadb/replay.py b/crates/cortexadb-py/cortexadb/replay.py index bafc061..282a29f 100644 --- a/crates/cortexadb-py/cortexadb/replay.py +++ b/crates/cortexadb-py/cortexadb/replay.py @@ -16,11 +16,11 @@ Lines 2..N — operation records (one JSON object per line): - {"op": "remember", "id": 1, "text": "...", "embedding": [...], "collection": "default", "metadata": null} + {"op": "add", "id": 1, "text": "...", "embedding": [...], "collection": "default", "metadata": null} {"op": "connect", "from_id": 1, "to_id": 2, "relation": "caused_by"} {"op": "compact"} -The ``id`` field in ``remember`` records is the *original* memory ID assigned +The ``id`` field in ``add`` records is the *original* memory ID assigned during recording. :class:`ReplayReader` builds an old→new ID mapping when replaying so that ``connect`` operations translate correctly. """ @@ -56,7 +56,7 @@ class ReplayWriter: Example:: writer = ReplayWriter("session.log", dimension=128, sync="strict") - writer.record_remember(id=1, text="hello", embedding=[...], collection="default") + writer.record_add(id=1, text="hello", embedding=[...], collection="default") writer.close() """ @@ -78,7 +78,7 @@ def __init__(self, path: str, dimension: int, sync: str = "strict") -> None: # Op recorders # ------------------------------------------------------------------ - def record_remember( + def record_add( self, *, id: int, @@ -87,9 +87,9 @@ def record_remember( collection: str, metadata: Optional[Dict[str, str]], ) -> None: - """Append a ``remember`` operation.""" + """Append an ``add`` operation.""" self._write({ - "op": "remember", + "op": "add", "id": id, "text": text, "embedding": embedding, @@ -171,7 +171,7 @@ class ReplayReader: reader = ReplayReader("session.log") print(reader.header) # ReplayHeader(...) for op in reader.operations(): - print(op) # {"op": "remember", ...} + print(op) # {"op": "add", ...} """ def __init__(self, path: str) -> None: diff --git a/crates/cortexadb-py/src/lib.rs b/crates/cortexadb-py/src/lib.rs index 549f586..781e197 100644 --- a/crates/cortexadb-py/src/lib.rs +++ b/crates/cortexadb-py/src/lib.rs @@ -287,7 +287,7 @@ impl PyStats { /// /// Example: /// >>> db = CortexaDB.open("/tmp/agent.mem", dimension=128) -/// >>> mid = db.remember_embedding([0.1] * 128) +/// >>> mid = db.add_embedding([0.1] * 128) /// >>> hits = db.ask_embedding([0.1] * 128, top_k=5) /// >>> print(hits[0].score) #[pyclass(name = "CortexaDB")] @@ -382,7 +382,7 @@ impl PyCortexaDB { text_signature = "(self, embedding, *, metadata=None, collection='default', content='')", signature = (embedding, *, metadata=None, collection="default".to_string(), content="".to_string()) )] - fn remember_embedding( + fn add_embedding( &self, py: Python<'_>, embedding: Vec, @@ -401,9 +401,9 @@ impl PyCortexaDB { let id = py .allow_threads(|| { if content.is_empty() { - self.inner.remember_in_collection(&collection, embedding, metadata) + self.inner.add_in_collection(&collection, embedding, metadata) } else { - self.inner.remember_with_content( + self.inner.add_with_content( &collection, content.into_bytes(), embedding, @@ -423,7 +423,7 @@ impl PyCortexaDB { /// Returns: /// int: The ID of the last command executed (for flushing/waiting). #[pyo3(text_signature = "(self, records)")] - fn remember_batch(&self, py: Python<'_>, records: Vec) -> PyResult> { + fn add_batch(&self, py: Python<'_>, records: Vec) -> PyResult> { for rec in &records { if let Some(emb) = &rec.embedding { if emb.len() != self.dimension { @@ -447,7 +447,7 @@ impl PyCortexaDB { .collect(); let ids = py - .allow_threads(|| self.inner.remember_batch(facade_records)) + .allow_threads(|| self.inner.add_batch(facade_records)) .map_err(|e| CortexaDBError::new_err(e.to_string()))?; Ok(ids) diff --git a/crates/cortexadb-py/test_smoke.py b/crates/cortexadb-py/test_smoke.py index 2b3c48e..7902fe7 100644 --- a/crates/cortexadb-py/test_smoke.py +++ b/crates/cortexadb-py/test_smoke.py @@ -19,7 +19,7 @@ def test_cortexadb_basic_flow(): db = CortexaDB.open(DB_PATH, dimension=3) # 2. Store memory - mid = db.remember("Hello world", embedding=[1.0, 0.0, 0.0]) + mid = db.add("Hello world", embedding=[1.0, 0.0, 0.0]) # 3. Ask hits = db.search("world", embedding=[1.0, 0.0, 0.0]) @@ -33,7 +33,7 @@ def test_cortexadb_basic_flow(): assert bytes(mem.content).decode("utf-8") == "Hello world" # 5. Connect - mid2 = db.remember("Goodbye", embedding=[0.0, 1.0, 0.0]) + mid2 = db.add("Goodbye", embedding=[0.0, 1.0, 0.0]) db.connect(mid, mid2, "related") # 6. Stats & Len @@ -53,8 +53,8 @@ def test_cortexadb_collections(): col_a = db.collection("agent_a") col_b = db.collection("agent_b") - id_a = col_a.remember("I am Agent A", embedding=[1.0, 0.0, 0.0]) - col_b.remember("I am Agent B", embedding=[0.0, 1.0, 0.0]) + id_a = col_a.add("I am Agent A", embedding=[1.0, 0.0, 0.0]) + col_b.add("I am Agent B", embedding=[0.0, 1.0, 0.0]) assert db.get(id_a).collection == "agent_a" @@ -73,15 +73,15 @@ def test_cortexadb_error_handling(): # Wrong dimension map with pytest.raises(CortexaDBError, match="embedding dimension mismatch"): - db.remember("Wrong dim", embedding=[1.0, 0.0]) + db.add("Wrong dim", embedding=[1.0, 0.0]) # Missing embedding required with pytest.raises(CortexaDBError, match="No embedder"): - db.remember("No embedding") + db.add("No embedding") # Wrong dimension on open — the mismatch check uses in-memory stats (entries > 0) # so no checkpoint is required. - mid = db.remember("Seed", embedding=[1.0, 0.0, 0.0]) + mid = db.add("Seed", embedding=[1.0, 0.0, 0.0]) with pytest.raises(CortexaDBError, match="(?i)dimension mismatch"): CortexaDB.open(DB_PATH, dimension=4) @@ -122,8 +122,8 @@ def test_hash_embedder_deterministic(): def test_open_with_embedder(): emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) - # remember without explicit embedding - mid = db.remember("Auto-embedded text") + # add without explicit embedding + mid = db.add("Auto-embedded text") assert mid > 0 hits = db.search("Auto-embedded text") assert len(hits) >= 1 @@ -135,10 +135,10 @@ def test_open_requires_one_of_dimension_or_embedder(): with pytest.raises(CortexaDBError, match="not both"): CortexaDB.open(DB_PATH, dimension=16, embedder=HashEmbedder(16)) -def test_remember_without_embedder_requires_embedding(): +def test_add_without_embedder_requires_embedding(): db = CortexaDB.open(DB_PATH, dimension=3) with pytest.raises(CortexaDBError, match="No embedder"): - db.remember("No embedding provided") + db.add("No embedding provided") def test_ingest_document(): emb = HashEmbedder(dimension=32) @@ -158,7 +158,7 @@ def test_collection_auto_embed(): emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) col = db.collection("agent_a") - mid = col.remember("I am agent A") + mid = col.add("I am agent A") assert db.get(mid).collection == "agent_a" hits = col.search("agent A") assert any(h.id == mid for h in hits) @@ -172,8 +172,8 @@ def test_collection_isolation(): col_a = db.collection("agent_a") col_b = db.collection("agent_b") - mid_a = col_a.remember("I am agent A, secret info") - mid_b = col_b.remember("I am agent B, different info") + mid_a = col_a.add("I am agent A, secret info") + mid_b = col_b.add("I am agent B, different info") hits_a = col_a.search("agent A", top_k=10) hits_b = col_b.search("agent B", top_k=10) @@ -192,9 +192,9 @@ def test_collection_search_param(): emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) - mid_a = db.remember("Agent A private", collection="agent_a") - mid_b = db.remember("Agent B private", collection="agent_b") - mid_s = db.remember("Shared knowledge", collection="shared") + mid_a = db.add("Agent A private", collection="agent_a") + mid_b = db.add("Agent B private", collection="agent_b") + mid_s = db.add("Shared knowledge", collection="shared") # Single collection via collections= param hits = db.search("knowledge", collections=["shared"]) @@ -209,9 +209,9 @@ def test_cross_collection_fan_out(): emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) - mid_a = db.remember("Agent A knowledge", collection="agent_a") - mid_s = db.remember("Shared knowledge", collection="shared") - db.remember("Agent B only", collection="agent_b") + mid_a = db.add("Agent A knowledge", collection="agent_a") + mid_s = db.add("Shared knowledge", collection="shared") + db.add("Agent B only", collection="agent_b") hits = db.search("knowledge", collections=["agent_a", "shared"], top_k=10) ids = {h.id for h in hits} @@ -226,9 +226,9 @@ def test_global_search_returns_all_collections(): emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) - mid_a = db.remember("Agent A fact", collection="agent_a") - mid_b = db.remember("Agent B fact", collection="agent_b") - mid_s = db.remember("Shared fact", collection="shared") + mid_a = db.add("Agent A fact", collection="agent_a") + mid_b = db.add("Agent B fact", collection="agent_b") + mid_s = db.add("Shared fact", collection="shared") hits = db.search("fact", top_k=10) ids = {h.id for h in hits} @@ -238,12 +238,12 @@ def test_global_search_returns_all_collections(): def test_readonly_collection(): - """A readonly collection should allow search() but reject remember().""" + """A readonly collection should allow search() but reject add().""" emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) # Write to shared normally. - mid = db.collection("shared").remember("Public knowledge") + mid = db.collection("shared").add("Public knowledge") # Read from a readonly view. ro = db.collection("shared", readonly=True) @@ -252,10 +252,10 @@ def test_readonly_collection(): # Writes must be rejected. with pytest.raises(CortexaDBError, match="read-only"): - ro.remember("Trying to write") + ro.add("Trying to write") with pytest.raises(CortexaDBError, match="read-only"): - ro.ingest_document("Document text") + ro.ingest("Document text") # Deterministic Replay import json @@ -281,8 +281,8 @@ def cleanup_replay(): def test_replay_recording_creates_ndjson(cleanup_replay): """Recording mode should produce a valid NDJSON file.""" with CortexaDB.open(DB_PATH, dimension=3, record=LOG_PATH) as db: - db.remember("First memory", embedding=[1.0, 0.0, 0.0]) - db.remember("Second memory", embedding=[0.0, 1.0, 0.0]) + db.add("First memory", embedding=[1.0, 0.0, 0.0]) + db.add("Second memory", embedding=[0.0, 1.0, 0.0]) assert os.path.exists(LOG_PATH) lines = open(LOG_PATH).read().strip().splitlines() @@ -295,7 +295,7 @@ def test_replay_recording_creates_ndjson(cleanup_replay): # 2 operation lines. ops = [json.loads(l) for l in lines[1:]] assert len(ops) == 2 - assert all(op["op"] == "remember" for op in ops) + assert all(op["op"] == "add" for op in ops) assert ops[0]["text"] == "First memory" assert len(ops[0]["embedding"]) == 3 @@ -303,8 +303,8 @@ def test_replay_recording_creates_ndjson(cleanup_replay): def test_replay_round_trip(cleanup_replay): """Replaying a log into a new DB should recreate the same memories.""" with CortexaDB.open(DB_PATH, dimension=3, record=LOG_PATH) as db: - mid1 = db.remember("Alpha", embedding=[1.0, 0.0, 0.0], collection="agent_a") - mid2 = db.remember("Beta", embedding=[0.0, 1.0, 0.0], collection="agent_b") + mid1 = db.add("Alpha", embedding=[1.0, 0.0, 0.0], collection="agent_a") + mid2 = db.add("Beta", embedding=[0.0, 1.0, 0.0], collection="agent_b") db2 = CortexaDB.replay(LOG_PATH, REPLAY_DB) @@ -319,8 +319,8 @@ def test_replay_round_trip(cleanup_replay): def test_replay_connect_id_mapping(cleanup_replay): """connect() IDs in the log should be translated to new IDs on replay.""" with CortexaDB.open(DB_PATH, dimension=3, record=LOG_PATH) as db: - a = db.remember("Node A", embedding=[1.0, 0.0, 0.0]) - b = db.remember("Node B", embedding=[0.0, 1.0, 0.0]) + a = db.add("Node A", embedding=[1.0, 0.0, 0.0]) + b = db.add("Node B", embedding=[0.0, 1.0, 0.0]) db.connect(a, b, "relates_to") db2 = CortexaDB.replay(LOG_PATH, REPLAY_DB) @@ -331,8 +331,8 @@ def test_replay_connect_id_mapping(cleanup_replay): def test_replay_collection_preserved(cleanup_replay): """Replay should preserve original collections.""" with CortexaDB.open(DB_PATH, dimension=3, record=LOG_PATH) as db: - db.remember("In A", embedding=[1.0, 0.0, 0.0], collection="agent_a") - db.remember("In B", embedding=[0.0, 1.0, 0.0], collection="agent_b") + db.add("In A", embedding=[1.0, 0.0, 0.0], collection="agent_a") + db.add("In B", embedding=[0.0, 1.0, 0.0], collection="agent_b") db2 = CortexaDB.replay(LOG_PATH, REPLAY_DB) @@ -355,7 +355,7 @@ def test_replay_invalid_log_raises(cleanup_replay): def test_replay_reader_header(): """ReplayReader should parse the header correctly.""" with CortexaDB.open(DB_PATH, dimension=4, record=LOG_PATH) as db: - db.remember("test", embedding=[1.0, 0.0, 0.0, 0.0]) + db.add("test", embedding=[1.0, 0.0, 0.0, 0.0]) reader = ReplayReader(LOG_PATH) assert reader.header.dimension == 4 @@ -364,7 +364,7 @@ def test_replay_reader_header(): ops = list(reader.operations()) assert len(ops) == 1 - assert ops[0]["op"] == "remember" + assert ops[0]["op"] == "add" # Cleanup os.remove(LOG_PATH) @@ -413,7 +413,7 @@ def test_replay_strict_unknown_op_raises(cleanup_replay): CortexaDB.replay(LOG_PATH, REPLAY_DB, strict=True) -def test_replay_non_strict_malformed_remember_skips(cleanup_replay): +def test_replay_non_strict_malformed_add_skips(cleanup_replay): with open(LOG_PATH, "w", encoding="utf-8") as f: f.write( json.dumps( @@ -427,20 +427,20 @@ def test_replay_non_strict_malformed_remember_skips(cleanup_replay): + "\n" ) # Missing required `embedding`. - f.write(json.dumps({"op": "remember", "text": "bad remember"}) + "\n") + f.write(json.dumps({"op": "add", "text": "bad add"}) + "\n") db = CortexaDB.replay(LOG_PATH, REPLAY_DB, strict=False) report = db.last_replay_report assert report is not None - assert report["op_counts"]["remember"] == 1 + assert report["op_counts"]["add"] == 1 assert report["skipped"] == 1 assert len(db) == 0 def test_export_replay_sets_report(cleanup_replay): db = CortexaDB.open(DB_PATH, dimension=3) - db.remember("One", embedding=[1.0, 0.0, 0.0]) - db.remember("Two", embedding=[0.0, 1.0, 0.0]) + db.add("One", embedding=[1.0, 0.0, 0.0]) + db.add("Two", embedding=[0.0, 1.0, 0.0]) db.export_replay(LOG_PATH) report = db.last_export_replay_report @@ -451,8 +451,8 @@ def test_export_replay_sets_report(cleanup_replay): def test_hybrid_use_graph(): import cortexadb db = cortexadb.CortexaDB.open(DB_PATH, dimension=2, sync="strict") - id1 = db.remember("Node A", embedding=[1.0, 0.0]) - id2 = db.remember("Node B", embedding=[0.0, 1.0]) # Orthogonal + id1 = db.add("Node A", embedding=[1.0, 0.0]) + id2 = db.add("Node B", embedding=[0.0, 1.0]) # Orthogonal db.connect(id1, id2, "links_to") # Vector only: expects id1 with high score, id2 with score ~0 @@ -474,8 +474,8 @@ def test_hybrid_use_graph_respects_collections(monkeypatch): import cortexadb db = cortexadb.CortexaDB.open(DB_PATH, dimension=2, sync="strict") - id_a = db.remember("Node A", embedding=[1.0, 0.0], collection="agent_a") - id_b = db.remember("Node B", embedding=[0.0, 1.0], collection="agent_b") + id_a = db.add("Node A", embedding=[1.0, 0.0], collection="agent_a") + id_b = db.add("Node B", embedding=[0.0, 1.0], collection="agent_b") def fake_get_neighbors(_mid): # Simulate an unexpected backend neighbor response across collections. @@ -497,7 +497,7 @@ def fake_get_neighbors(_mid): def test_hybrid_recency_bias(): import cortexadb db = cortexadb.CortexaDB.open(DB_PATH, dimension=2, sync="strict") - id1 = db.remember("Node A", embedding=[1.0, 0.0]) + id1 = db.add("Node A", embedding=[1.0, 0.0]) hits_normal = db.search("test", embedding=[1.0, 0.0], top_k=1) hits_recent = db.search("test", embedding=[1.0, 0.0], top_k=1, recency_bias=True) @@ -513,7 +513,7 @@ def test_capacity_max_entries(tmp_path): # Insert 8 memories. for i in range(8): - db.remember(f"Content {i}", embedding=[1.0, 0.0]) + db.add(f"Content {i}", embedding=[1.0, 0.0]) # Should evict the oldest 3. stats = db.stats() diff --git a/crates/cortexadb-py/test_stress.py b/crates/cortexadb-py/test_stress.py index 761cc67..1ffdb27 100644 --- a/crates/cortexadb-py/test_stress.py +++ b/crates/cortexadb-py/test_stress.py @@ -20,7 +20,7 @@ def test_replay_safety(clean_db_path): with CortexaDB.open(clean_db_path, dimension=2, sync="strict") as db: start_time = time.time() for i in range(5000): - db.remember(f"Entry {i}", embedding=[0.5, 0.5]) + db.add(f"Entry {i}", embedding=[0.5, 0.5]) print(f"Inserted 5,000 memories in {time.time() - start_time:.2f}s") assert len(db) == 5000 @@ -36,7 +36,7 @@ def test_compaction_integrity(clean_db_path): with CortexaDB.open(clean_db_path, dimension=2, sync="strict") as db: for _ in range(100): - db.remember("Stress entry", embedding=[0.1, 0.9]) + db.add("Stress entry", embedding=[0.1, 0.9]) assert len(db) == 100 @@ -58,7 +58,7 @@ def test_concurrent_compaction(clean_db_path): # but compact_segments also filters by deletion ratio. # Let's insert enough to have some churn. for i in range(1000): - db.remember(f"Entry {i}", embedding=[0.5, 0.5]) + db.add(f"Entry {i}", embedding=[0.5, 0.5]) assert len(db) == 1000 diff --git a/docs/api/python.md b/docs/api/python.md index feb199e..af1db51 100644 --- a/docs/api/python.md +++ b/docs/api/python.md @@ -69,7 +69,7 @@ report = db.last_replay_report ## Memory Operations -### `.remember(text, embedding=None, metadata=None)` +### `.add(text, embedding=None, metadata=None)` Stores a new memory entry. If an embedder is configured and no embedding is provided, the text is auto-embedded. @@ -85,9 +85,9 @@ Stores a new memory entry. If an embedder is configured and no embedding is prov **Example:** ```python -mid = db.remember("User prefers dark mode") -mid = db.remember("text", metadata={"source": "onboarding"}) -mid = db.remember("text", embedding=[0.1, 0.2, ...]) +mid = db.add("User prefers dark mode") +mid = db.add("text", metadata={"source": "onboarding"}) +mid = db.add("text", embedding=[0.1, 0.2, ...]) ``` --- diff --git a/docs/api/rust.md b/docs/api/rust.md index 7247155..d73e779 100644 --- a/docs/api/rust.md +++ b/docs/api/rust.md @@ -22,29 +22,29 @@ let db = CortexaDB::builder("/path/to/db", config).build()?; ### Memory Operations -#### `remember(embedding, metadata) -> Result` +#### `add(embedding, metadata) -> Result` Stores a memory in the default collection. ```rust -let id = db.remember(vec![0.1; 128], None)?; -let id = db.remember(vec![0.1; 128], Some(metadata_map))?; +let id = db.add(vec![0.1; 128], None)?; +let id = db.add(vec![0.1; 128], Some(metadata_map))?; ``` -#### `remember_in_collection(collection, embedding, metadata) -> Result` +#### `add_in_collection(collection, embedding, metadata) -> Result` Stores a memory in a specific collection. ```rust -let id = db.remember_in_collection("agent_a", vec![0.1; 128], None)?; +let id = db.add_in_collection("agent_a", vec![0.1; 128], None)?; ``` -#### `remember_with_content(collection, content, embedding, metadata) -> Result` +#### `add_with_content(collection, content, embedding, metadata) -> Result` Stores a memory with raw content bytes. ```rust -let id = db.remember_with_content( +let id = db.add_with_content( "default", b"Hello world".to_vec(), vec![0.1; 128], diff --git a/docs/content/docs/api/rust.mdx b/docs/content/docs/api/rust.mdx index 2ab163a..92dc8c1 100644 --- a/docs/content/docs/api/rust.mdx +++ b/docs/content/docs/api/rust.mdx @@ -27,29 +27,29 @@ let db = CortexaDBBuilder::new("/path/to/db", 128) ### Memory Operations -#### `remember(embedding, metadata) -> Result` +#### `add(embedding, metadata) -> Result` Stores a memory in the default collection. ```rust -let id = db.remember(vec![0.1; 128], None)?; -let id = db.remember(vec![0.1; 128], Some(metadata_map))?; +let id = db.add(vec![0.1; 128], None)?; +let id = db.add(vec![0.1; 128], Some(metadata_map))?; ``` -#### `remember_in_collection(collection, embedding, metadata) -> Result` +#### `add_in_collection(collection, embedding, metadata) -> Result` Stores a memory in a specific collection. ```rust -let id = db.remember_in_collection("agent_a", vec![0.1; 128], None)?; +let id = db.add_in_collection("agent_a", vec![0.1; 128], None)?; ``` -#### `remember_with_content(collection, content, embedding, metadata) -> Result` +#### `add_with_content(collection, content, embedding, metadata) -> Result` Stores a memory with raw content bytes. ```rust -let id = db.remember_with_content( +let id = db.add_with_content( "default", b"Hello world".to_vec(), vec![0.1; 128], diff --git a/docs/content/docs/getting-started/quickstart.mdx b/docs/content/docs/getting-started/quickstart.mdx index 0bee787..1fbe1a0 100644 --- a/docs/content/docs/getting-started/quickstart.mdx +++ b/docs/content/docs/getting-started/quickstart.mdx @@ -97,7 +97,7 @@ fn main() -> Result<(), Box> { // Store a memory with an embedding let embedding = vec![0.1; 128]; - let id = db.remember(embedding.clone(), None)?; + let id = db.add(embedding.clone(), None)?; // Query let hits = db.ask(embedding, 5, None)?; @@ -106,7 +106,7 @@ fn main() -> Result<(), Box> { } // Connect memories - let id2 = db.remember(vec![0.2; 128], None)?; + let id2 = db.add(vec![0.2; 128], None)?; db.connect(id, id2, "related_to")?; // Checkpoint for fast recovery diff --git a/docs/content/docs/guides/replay.mdx b/docs/content/docs/guides/replay.mdx index 26db678..e09ae5e 100644 --- a/docs/content/docs/guides/replay.mdx +++ b/docs/content/docs/guides/replay.mdx @@ -43,8 +43,8 @@ The log file is NDJSON with a header line followed by operation lines: **Operations (lines 2+):** ```json -{"op": "remember", "id": 1, "text": "User likes dark mode", "embedding": [...], "collection": "default", "metadata": null} -{"op": "remember", "id": 2, "text": "User works at Stripe", "embedding": [...], "collection": "default", "metadata": null} +{"op": "add", "id": 1, "text": "User likes dark mode", "embedding": [...], "collection": "default", "metadata": null} +{"op": "add", "id": 2, "text": "User works at Stripe", "embedding": [...], "collection": "default", "metadata": null} {"op": "connect", "from_id": 1, "to_id": 2, "relation": "relates_to"} {"op": "delete", "id": 1} {"op": "compact"} diff --git a/docs/content/docs/resources/examples.mdx b/docs/content/docs/resources/examples.mdx index c59f6b2..40d9390 100644 --- a/docs/content/docs/resources/examples.mdx +++ b/docs/content/docs/resources/examples.mdx @@ -218,11 +218,11 @@ fn main() -> Result<(), Box> { // Store memories let emb = vec![0.1_f32; 128]; - let id1 = db.remember(emb.clone(), None)?; + let id1 = db.add(emb.clone(), None)?; let mut meta = HashMap::new(); meta.insert("source".into(), "test".into()); - let id2 = db.remember(vec![0.2; 128], Some(meta))?; + let id2 = db.add(vec![0.2; 128], Some(meta))?; // Query let hits = db.ask(emb, 5, None)?; diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 728efc3..54556b5 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -27,11 +27,11 @@ db = CortexaDB.open("agent.mem", dimension=128) ```python # Auto-embedding (requires embedder) -mid1 = db.remember("The user prefers dark mode.") -mid2 = db.remember("User works at Stripe.") +mid1 = db.add("The user prefers dark mode.") +mid2 = db.add("User works at Stripe.") # With metadata -mid3 = db.remember("User's name is Alice.", metadata={"source": "onboarding"}) +mid3 = db.add("User's name is Alice.", metadata={"source": "onboarding"}) ``` ### 4. Query Memories @@ -69,7 +69,7 @@ db.ingest("Long article text here...", strategy="markdown") ```python agent_a = db.collection("agent_a") -agent_a.remember("Agent A's private memory") +agent_a.add("Agent A's private memory") hits = agent_a.ask("query only agent A's memories") ``` @@ -94,7 +94,7 @@ fn main() -> Result<(), Box> { // Store a memory with an embedding let embedding = vec![0.1; 128]; - let id = db.remember(embedding.clone(), None)?; + let id = db.add(embedding.clone(), None)?; // Query let hits = db.ask(embedding, 5, None)?; @@ -103,7 +103,7 @@ fn main() -> Result<(), Box> { } // Connect memories - let id2 = db.remember(vec![0.2; 128], None)?; + let id2 = db.add(vec![0.2; 128], None)?; db.connect(id, id2, "related_to")?; // Checkpoint for fast recovery diff --git a/docs/guides/core-concepts.md b/docs/guides/core-concepts.md index 8423ce7..750fe9f 100644 --- a/docs/guides/core-concepts.md +++ b/docs/guides/core-concepts.md @@ -24,7 +24,7 @@ The database is built around three pillars: │ ┌────────────────────────▼─────────────────────────┐ │ CortexaDB Facade │ -│ High-level API (remember, ask, etc.) │ +│ High-level API (add, ask, etc.) │ └────────────────────────┬─────────────────────────┘ │ ┌────────────────────────▼─────────────────────────┐ @@ -62,7 +62,7 @@ The database is built around three pillars: ### Facade -The `CortexaDB` facade is the primary entry point. It provides the high-level API (`remember`, `ask`, `connect`, etc.) and delegates to the store for durability and concurrency. +The `CortexaDB` facade is the primary entry point. It provides the high-level API (`add`, `ask`, `connect`, etc.) and delegates to the store for durability and concurrency. ### Store diff --git a/docs/guides/embedders.md b/docs/guides/embedders.md index d345b5c..66c05d6 100644 --- a/docs/guides/embedders.md +++ b/docs/guides/embedders.md @@ -9,11 +9,11 @@ Without an embedder, you must provide raw embedding vectors manually. With an em ```python # Without embedder - manual vectors db = CortexaDB.open("db.mem", dimension=128) -db.remember("text", embedding=[0.1, 0.2, ...]) # must provide embedding +db.add("text", embedding=[0.1, 0.2, ...]) # must provide embedding # With embedder - automatic db = CortexaDB.open("db.mem", embedder=OpenAIEmbedder()) -db.remember("text") # auto-embedded +db.add("text") # auto-embedded db.ask("query") # auto-embedded ``` @@ -117,7 +117,7 @@ class MyEmbedder(Embedder): When an embedder is configured: -1. **`remember(text)`** - Text is embedded via `embedder.embed(text)`, then stored with the embedding +1. **`add(text)`** - Text is embedded via `embedder.embed(text)`, then stored with the embedding 2. **`ask(query)`** - Query is embedded via `embedder.embed(query)`, then used for vector search 3. **`ingest(text)`** - Each chunk is embedded individually after chunking 4. **`load(file)`** - File is read, chunked, and each chunk is embedded @@ -125,7 +125,7 @@ When an embedder is configured: You can always override auto-embedding by providing an explicit `embedding` parameter: ```python -db.remember("text", embedding=[0.1, 0.2, ...]) # uses provided embedding +db.add("text", embedding=[0.1, 0.2, ...]) # uses provided embedding ``` --- diff --git a/docs/guides/replay.md b/docs/guides/replay.md index 1a4ce54..61a24e1 100644 --- a/docs/guides/replay.md +++ b/docs/guides/replay.md @@ -4,7 +4,7 @@ CortexaDB can record every operation to a log file and replay it to recreate an ## Overview -Recording captures all write operations (remember, connect, delete, compact, checkpoint) as NDJSON (newline-delimited JSON). Replay reads the log and re-applies each operation to build a new database. +Recording captures all write operations (add, connect, delete, compact, checkpoint) as NDJSON (newline-delimited JSON). Replay reads the log and re-applies each operation to build a new database. --- @@ -16,8 +16,8 @@ Enable recording by passing a `record` path when opening the database: db = CortexaDB.open("agent.mem", dimension=128, record="session.log") # All operations are now logged -mid1 = db.remember("User likes dark mode", embedding=[...]) -mid2 = db.remember("User works at Stripe", embedding=[...]) +mid1 = db.add("User likes dark mode", embedding=[...]) +mid2 = db.add("User works at Stripe", embedding=[...]) db.connect(mid1, mid2, "relates_to") db.delete_memory(mid1) db.compact() @@ -40,8 +40,8 @@ The log file is NDJSON with a header line followed by operation lines: **Operations (lines 2+):** ```json -{"op": "remember", "id": 1, "text": "User likes dark mode", "embedding": [...], "collection": "default", "metadata": null} -{"op": "remember", "id": 2, "text": "User works at Stripe", "embedding": [...], "collection": "default", "metadata": null} +{"op": "add", "id": 1, "text": "User likes dark mode", "embedding": [...], "collection": "default", "metadata": null} +{"op": "add", "id": 2, "text": "User works at Stripe", "embedding": [...], "collection": "default", "metadata": null} {"op": "connect", "from_id": 1, "to_id": 2, "relation": "relates_to"} {"op": "delete", "id": 1} {"op": "compact"} @@ -91,7 +91,7 @@ print(report["total_ops"]) # Total operations in the log print(report["applied"]) # Successfully applied print(report["skipped"]) # Skipped (malformed but non-fatal) print(report["failed"]) # Failed (execution error, non-fatal) -print(report["op_counts"]) # Per-type counts: {"remember": 5, "connect": 2, ...} +print(report["op_counts"]) # Per-type counts: {"add": 5, "connect": 2, ...} print(report["failures"]) # List of up to 50 failure details ``` diff --git a/docs/index.md b/docs/index.md index a813e06..c1bc598 100644 --- a/docs/index.md +++ b/docs/index.md @@ -54,8 +54,8 @@ from cortexadb.providers.openai import OpenAIEmbedder db = CortexaDB.open("agent.mem", embedder=OpenAIEmbedder()) -db.remember("The user prefers dark mode.") -db.remember("User works at Stripe.") +db.add("The user prefers dark mode.") +db.add("User works at Stripe.") hits = db.ask("What does the user like?") for hit in hits: diff --git a/docs/resources/examples.md b/docs/resources/examples.md index ccaac8c..0a2cd39 100644 --- a/docs/resources/examples.md +++ b/docs/resources/examples.md @@ -11,9 +11,9 @@ from cortexadb.providers.openai import OpenAIEmbedder db = CortexaDB.open("agent.mem", embedder=OpenAIEmbedder()) # Store memories -mid1 = db.remember("The user prefers dark mode.") -mid2 = db.remember("User works at Stripe.") -mid3 = db.remember("User's favorite language is Python.") +mid1 = db.add("The user prefers dark mode.") +mid2 = db.add("User works at Stripe.") +mid3 = db.add("User's favorite language is Python.") # Search hits = db.ask("What programming language does the user like?") @@ -30,9 +30,9 @@ for hit in hits: db = CortexaDB.open("knowledge.mem", embedder=embedder) # Store entities -alice = db.remember("Alice is a software engineer at Acme Corp") -bob = db.remember("Bob is Alice's manager") -acme = db.remember("Acme Corp builds developer tools") +alice = db.add("Alice is a software engineer at Acme Corp") +bob = db.add("Bob is Alice's manager") +acme = db.add("Acme Corp builds developer tools") # Create relationships db.connect(alice, bob, "reports_to") @@ -56,10 +56,10 @@ researcher = db.collection("researcher") writer = db.collection("writer") # Agents store memories independently -planner.remember("Task: Write a blog post about vector databases") -researcher.remember("Found: CortexaDB supports HNSW indexing") -researcher.remember("Found: Typical recall is 95% with HNSW") -writer.remember("Draft intro: Vector databases are transforming AI...") +planner.add("Task: Write a blog post about vector databases") +researcher.add("Found: CortexaDB supports HNSW indexing") +researcher.add("Found: Typical recall is 95% with HNSW") +writer.add("Draft intro: Vector databases are transforming AI...") # Each agent queries only its own memories research = researcher.ask("What did I find about indexing?") @@ -105,8 +105,8 @@ for hit in hits: ```python # Record a session db = CortexaDB.open("agent.mem", embedder=embedder, record="session.log") -db.remember("User asked about pricing") -db.remember("Showed enterprise plan") +db.add("User asked about pricing") +db.add("Showed enterprise plan") db.connect(1, 2, "led_to") # Later: replay the session for debugging @@ -162,7 +162,7 @@ db = CortexaDB.open( # Old, low-importance memories are automatically evicted for i in range(20000): - db.remember(f"Memory #{i}") + db.add(f"Memory #{i}") stats = db.stats() print(f"Entries: {stats.entries}") # ~10000 (eviction kicked in) @@ -176,9 +176,9 @@ print(f"Entries: {stats.entries}") # ~10000 (eviction kicked in) db = CortexaDB.open("agent.mem", embedder=embedder) # Store with metadata -db.remember("Dark mode enabled", metadata={"category": "preference"}) -db.remember("Meeting at 3pm", metadata={"category": "schedule"}) -db.remember("Likes Python", metadata={"category": "preference"}) +db.add("Dark mode enabled", metadata={"category": "preference"}) +db.add("Meeting at 3pm", metadata={"category": "schedule"}) +db.add("Likes Python", metadata={"category": "preference"}) # Filter by metadata (if supported by your query) hits = db.ask("What are the user's preferences?") @@ -216,11 +216,11 @@ fn main() -> Result<(), Box> { // Store memories let emb = vec![0.1_f32; 128]; - let id1 = db.remember(emb.clone(), None)?; + let id1 = db.add(emb.clone(), None)?; let mut meta = HashMap::new(); meta.insert("source".into(), "test".into()); - let id2 = db.remember(vec![0.2; 128], Some(meta))?; + let id2 = db.add(vec![0.2; 128], Some(meta))?; // Query let hits = db.ask(emb, 5, None)?; diff --git a/examples/rust/basic_usage.rs b/examples/rust/basic_usage.rs index 75521b7..50a2d31 100644 --- a/examples/rust/basic_usage.rs +++ b/examples/rust/basic_usage.rs @@ -148,8 +148,8 @@ Content under heading 3. ]; // Bulk insert with 100x speedup - let last_id = db.remember_batch(records)?; - println!(" Batch finished. Last inserted ID: {}", last_id); + let last_id = db.add_batch(records)?; + println!(" Batch finished. Last inserted ID: {}", last_id.last().unwrap()); // For manual IDs in the example, we'll use 1, 2, 3 assuming clean start let id1 = 1; let id2 = 2; let id3 = 3; @@ -181,7 +181,7 @@ Content under heading 3. // ----------------------------------------------------------- println!("\n[7] Collections..."); let travel_text = "Flight to Tokyo booked for June."; - let col_id = db.remember_with_content( + let col_id = db.add_with_content( "travel_agent", travel_text.as_bytes().to_vec(), embed_text(travel_text, dimension), From b7213fa234cc2bc20005b5000549579a3a2a3682 Mon Sep 17 00:00:00 2001 From: anaslimem Date: Mon, 9 Mar 2026 05:19:24 +0100 Subject: [PATCH 3/6] Renamed ask to search --- crates/cortexadb-core/src/facade.rs | 28 +++++++++---------- crates/cortexadb-core/tests/integration.rs | 16 +++++------ crates/cortexadb-py/cortexadb/client.py | 7 ++--- crates/cortexadb-py/cortexadb/embedder.py | 2 +- crates/cortexadb-py/src/lib.rs | 12 ++++---- docs/api/python.md | 8 +++--- docs/api/rust.md | 8 +++--- docs/content/docs/api/rust.mdx | 8 +++--- .../docs/getting-started/quickstart.mdx | 2 +- docs/content/docs/resources/examples.mdx | 4 +-- docs/getting-started/quickstart.md | 6 ++-- docs/guides/collections.md | 2 +- docs/guides/core-concepts.md | 4 +-- docs/guides/embedders.md | 4 +-- docs/guides/query-engine.md | 16 +++++------ docs/index.md | 2 +- docs/resources/examples.md | 16 +++++------ examples/rust/basic_usage.rs | 4 +-- 18 files changed, 74 insertions(+), 75 deletions(-) diff --git a/crates/cortexadb-core/src/facade.rs b/crates/cortexadb-core/src/facade.rs index 2477fb6..b47a408 100644 --- a/crates/cortexadb-core/src/facade.rs +++ b/crates/cortexadb-core/src/facade.rs @@ -2,7 +2,7 @@ //! //! This is the recommended entry point for using CortexaDB as a library. //! It wraps [`CortexaDBStore`] and hides planner/engine/index details behind -//! five core operations: `open`, `add`, `ask`, `connect`, `compact`. +//! five core operations: `open`, `add`, `search`, `connect`, `compact`. use std::collections::HashMap; use std::path::PathBuf; @@ -175,7 +175,7 @@ impl QueryEmbedder for StaticEmbedder { /// .build()?; /// /// let id = db.add(vec![1.0, 0.0, 0.0], None)?; -/// let hits = db.ask(vec![1.0, 0.0, 0.0], 5, None)?; +/// let hits = db.search(vec![1.0, 0.0, 0.0], 5, None)?; /// # Ok(()) /// # } /// ``` @@ -353,7 +353,7 @@ impl CortexaDB { /// # Errors /// /// Returns [`CortexaDBError`] if the query execution fails. - pub fn ask( + pub fn search( &self, query_embedding: Vec, top_k: usize, @@ -384,7 +384,7 @@ impl CortexaDB { /// Over-fetches by 4× top_k globally, then filters by collection and /// returns the top *top_k* results. This avoids a separate index per /// collection while keeping the filter inside Rust (no GIL round-trips). - pub fn ask_in_collection( + pub fn search_in_collection( &self, collection: &str, query_embedding: Vec, @@ -516,7 +516,7 @@ mod tests { use tempfile::TempDir; #[test] - fn test_open_add_ask() { + fn test_open_add_search() { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); @@ -525,7 +525,7 @@ mod tests { let id2 = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); assert_ne!(id1, id2); - let hits = db.ask(vec![1.0, 0.0, 0.0], 5, None).unwrap(); + let hits = db.search(vec![1.0, 0.0, 0.0], 5, None).unwrap(); assert!(!hits.is_empty()); assert_eq!(hits[0].id, id1); } @@ -562,7 +562,7 @@ mod tests { let stats = db.stats(); assert_eq!(stats.entries, 2); - let hits = db.ask(vec![1.0, 0.0, 0.0], 5, None).unwrap(); + let hits = db.search(vec![1.0, 0.0, 0.0], 5, None).unwrap(); assert!(!hits.is_empty()); } @@ -615,7 +615,7 @@ mod tests { meta.insert("source".to_string(), "test".to_string()); let id = db.add(vec![1.0, 0.0, 0.0], Some(meta)).unwrap(); - let hits = db.ask(vec![1.0, 0.0, 0.0], 1, None).unwrap(); + let hits = db.search(vec![1.0, 0.0, 0.0], 1, None).unwrap(); assert_eq!(hits[0].id, id); let memory = db.get_memory(id).unwrap(); @@ -664,7 +664,7 @@ mod tests { db.delete_memory(id).unwrap(); - let hits = db.ask(vec![1.0, 0.0, 0.0], 10, None).unwrap(); + let hits = db.search(vec![1.0, 0.0, 0.0], 10, None).unwrap(); assert!( hits.iter().all(|h| h.id != id), "deleted memory must not appear in search results" @@ -730,7 +730,7 @@ mod tests { let id_a = db.add_in_collection("ns_a", vec![1.0, 0.0, 0.0], None).unwrap(); let _id_b = db.add_in_collection("ns_b", vec![1.0, 0.0, 0.0], None).unwrap(); - let hits = db.ask_in_collection("ns_a", vec![1.0, 0.0, 0.0], 10, None).unwrap(); + let hits = db.search_in_collection("ns_a", vec![1.0, 0.0, 0.0], 10, None).unwrap(); assert!(!hits.is_empty(), "should find memories in ns_a"); assert!( hits.iter().all(|h| h.id == id_a), @@ -777,7 +777,7 @@ mod tests { let id_b = db.add_in_collection("ns_sparse", vec![0.9, 0.1, 0.0], None).unwrap(); // Ask for top-2 in ns_sparse — both must be returned. - let hits = db.ask_in_collection("ns_sparse", vec![1.0, 0.0, 0.0], 2, None).unwrap(); + let hits = db.search_in_collection("ns_sparse", vec![1.0, 0.0, 0.0], 2, None).unwrap(); let hit_ids: Vec = hits.iter().map(|h| h.id).collect(); assert!( hit_ids.contains(&id_a), @@ -794,13 +794,13 @@ mod tests { // ----- Intent anchors end-to-end ----- #[test] - fn test_ask_without_intent_anchors_unchanged() { + fn test_search_without_intent_anchors_unchanged() { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); db.add(vec![1.0, 0.0, 0.0], None).unwrap(); - // Default QueryOptions has intent_anchors = None; must produce same results as ask(). - let hits = db.ask(vec![1.0, 0.0, 0.0], 5, None).unwrap(); + // Default QueryOptions has intent_anchors = None; must produce same results as search(). + let hits = db.search(vec![1.0, 0.0, 0.0], 5, None).unwrap(); assert!(!hits.is_empty()); } } diff --git a/crates/cortexadb-core/tests/integration.rs b/crates/cortexadb-core/tests/integration.rs index 5f2bc98..f2c4c84 100644 --- a/crates/cortexadb-core/tests/integration.rs +++ b/crates/cortexadb-core/tests/integration.rs @@ -1,6 +1,6 @@ //! Integration tests for CortexaDB. //! -//! These tests exercise the full stack: open → add → ask → checkpoint → recover. +//! These tests exercise the full stack: open → add → search → checkpoint → recover. //! Unlike the unit tests in `src/`, these tests run against actual disk files (via tempdir). use cortexadb_core::{CortexaDB, CortexaDBConfig}; @@ -24,11 +24,11 @@ fn open_db_with_config(dir: &TempDir, config: CortexaDBConfig) -> CortexaDB { } // --------------------------------------------------------------------------- -// Basic open → add → ask → recover +// Basic open → add → search → recover // --------------------------------------------------------------------------- #[test] -fn test_full_open_add_ask() { +fn test_full_open_add_search() { let dir = TempDir::new().unwrap(); let path = dir.path().join("db"); let db = open_db(&path); @@ -36,11 +36,11 @@ fn test_full_open_add_ask() { let id1 = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); let id2 = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); - let hits = db.ask(vec![1.0, 0.0, 0.0], 5, None).unwrap(); - assert!(!hits.is_empty(), "ask should return results"); + let hits = db.search(vec![1.0, 0.0, 0.0], 5, None).unwrap(); + assert!(!hits.is_empty(), "search should return results"); assert_eq!(hits[0].id, id1, "top hit should be id1 (exact match)"); - let hits2 = db.ask(vec![0.0, 1.0, 0.0], 5, None).unwrap(); + let hits2 = db.search(vec![0.0, 1.0, 0.0], 5, None).unwrap(); assert_eq!(hits2[0].id, id2, "top hit for second query should be id2"); } @@ -83,7 +83,7 @@ fn test_recover_search_returns_correct_top_hit() { } let db = open_db(&path); - let hits = db.ask(vec![1.0, 0.0, 0.0], 1, None).unwrap(); + let hits = db.search(vec![1.0, 0.0, 0.0], 1, None).unwrap(); assert_eq!(hits.len(), 1); assert_eq!(hits[0].id, id_target, "top hit after recovery must be the matching entry"); } @@ -336,7 +336,7 @@ fn test_hnsw_recovery_sync() { assert!(db.get_memory(id_target).is_ok(), "uncheckpointed entry must survive"); // Perform an HNSW search to ensure the vector index was properly synced during recovery - let hits = db.ask(vec![1.0, 0.0, 0.0], 5, None).unwrap(); + let hits = db.search(vec![1.0, 0.0, 0.0], 5, None).unwrap(); assert!(!hits.is_empty()); assert_eq!(hits[0].id, id_target, "top hit should be the post-checkpoint entry"); } diff --git a/crates/cortexadb-py/cortexadb/client.py b/crates/cortexadb-py/cortexadb/client.py index 8320342..a57289a 100644 --- a/crates/cortexadb-py/cortexadb/client.py +++ b/crates/cortexadb-py/cortexadb/client.py @@ -258,14 +258,14 @@ def search( vec = self._resolve_embedding(query, vector) if collections is None: - base_hits = self._inner.ask_embedding(vec, top_k=limit, filter=filter) + base_hits = self._inner.search_embedding(vec, top_k=limit, filter=filter) elif len(collections) == 1: - base_hits = self._inner.ask_in_collection(collections[0], vec, top_k=limit, filter=filter) + base_hits = self._inner.search_in_collection(collections[0], vec, top_k=limit, filter=filter) else: seen_ids = set() base_hits = [] for ns in collections: - for hit in self._inner.ask_in_collection(ns, vec, top_k=limit, filter=filter): + for hit in self._inner.search_in_collection(ns, vec, top_k=limit, filter=filter): if hit.id not in seen_ids: seen_ids.add(hit.id) base_hits.append(hit) @@ -392,6 +392,5 @@ def __exit__(self, *a): return False # Legacy Aliases - def ask(self, *a, **k): return self.search(*a, **k) def ingest_document(self, *a, **k): return self.ingest(*a, **k) def delete_memory(self, mid: int): self.delete(mid) diff --git a/crates/cortexadb-py/cortexadb/embedder.py b/crates/cortexadb-py/cortexadb/embedder.py index 2958575..cf92c5f 100644 --- a/crates/cortexadb-py/cortexadb/embedder.py +++ b/crates/cortexadb-py/cortexadb/embedder.py @@ -10,7 +10,7 @@ db = CortexaDB.open("agent.mem", embedder=OpenAIEmbedder(api_key="sk-...")) db.add("We chose Stripe for payments") # embeds automatically - hits = db.ask("payment provider?") # embeds query automatically + hits = db.search("payment provider?") # embeds query automatically """ from __future__ import annotations diff --git a/crates/cortexadb-py/src/lib.rs b/crates/cortexadb-py/src/lib.rs index 781e197..68eb1b6 100644 --- a/crates/cortexadb-py/src/lib.rs +++ b/crates/cortexadb-py/src/lib.rs @@ -154,7 +154,7 @@ impl PyBatchRecord { // Hit — lightweight query result // --------------------------------------------------------------------------- -/// A scored query hit. Returned by `CortexaDB.ask_embedding()`. +/// A scored query hit. Returned by `CortexaDB.search_embedding()`. /// /// Attributes: /// id (int): Memory identifier. @@ -288,7 +288,7 @@ impl PyStats { /// Example: /// >>> db = CortexaDB.open("/tmp/agent.mem", dimension=128) /// >>> mid = db.add_embedding([0.1] * 128) -/// >>> hits = db.ask_embedding([0.1] * 128, top_k=5) +/// >>> hits = db.search_embedding([0.1] * 128, top_k=5) /// >>> print(hits[0].score) #[pyclass(name = "CortexaDB")] struct PyCortexaDB { @@ -468,7 +468,7 @@ impl PyCortexaDB { text_signature = "(self, embedding, *, top_k=5, filter=None)", signature = (embedding, *, top_k=5, filter=None) )] - fn ask_embedding( + fn search_embedding( &self, py: Python<'_>, embedding: Vec, @@ -484,7 +484,7 @@ impl PyCortexaDB { } let results = py - .allow_threads(|| self.inner.ask(embedding, top_k, filter)) + .allow_threads(|| self.inner.search(embedding, top_k, filter)) .map_err(map_cortexadb_err)?; Ok(results.into_iter().map(|m| PyHit { id: m.id, score: m.score }).collect()) } @@ -505,7 +505,7 @@ impl PyCortexaDB { text_signature = "(self, collection, embedding, *, top_k=5, filter=None)", signature = (collection, embedding, *, top_k=5, filter=None) )] - fn ask_in_collection( + fn search_in_collection( &self, py: Python<'_>, collection: &str, @@ -522,7 +522,7 @@ impl PyCortexaDB { } let results = py - .allow_threads(|| self.inner.ask_in_collection(collection, embedding, top_k, filter)) + .allow_threads(|| self.inner.search_in_collection(collection, embedding, top_k, filter)) .map_err(map_cortexadb_err)?; Ok(results.into_iter().map(|m| m.into()).collect::>()) diff --git a/docs/api/python.md b/docs/api/python.md index af1db51..5e2fa16 100644 --- a/docs/api/python.md +++ b/docs/api/python.md @@ -92,7 +92,7 @@ mid = db.add("text", embedding=[0.1, 0.2, ...]) --- -### `.ask(query, embedding=None, top_k=5, use_graph=False, recency_bias=False)` +### `.search(query, embedding=None, top_k=5, use_graph=False, recency_bias=False)` Performs a hybrid search across the database. @@ -110,8 +110,8 @@ Performs a hybrid search across the database. **Example:** ```python -hits = db.ask("What does the user prefer?") -hits = db.ask("query", top_k=10, use_graph=True, recency_bias=True) +hits = db.search("What does the user prefer?") +hits = db.search("query", top_k=10, use_graph=True, recency_bias=True) for hit in hits: print(f"ID: {hit.id}, Score: {hit.score:.3f}") @@ -368,7 +368,7 @@ Exports the current database state as a replay log. ### `Hit` -Query result from `.ask()`. +Query result from `.search()`. | Field | Type | Description | |-------|------|-------------| diff --git a/docs/api/rust.md b/docs/api/rust.md index d73e779..80a4ceb 100644 --- a/docs/api/rust.md +++ b/docs/api/rust.md @@ -52,23 +52,23 @@ let id = db.add_with_content( )?; ``` -#### `ask(embedding, top_k, metadata_filter) -> Result>` +#### `search(embedding, top_k, metadata_filter) -> Result>` Vector similarity search in the default collection. ```rust -let hits = db.ask(vec![0.1; 128], 5, None)?; +let hits = db.search(vec![0.1; 128], 5, None)?; for hit in &hits { println!("ID: {}, Score: {:.3}", hit.id, hit.score); } ``` -#### `ask_in_collection(collection, embedding, top_k, filter) -> Result>` +#### `search_in_collection(collection, embedding, top_k, filter) -> Result>` Collection-scoped search. ```rust -let hits = db.ask_in_collection("agent_a", vec![0.1; 128], 5, None)?; +let hits = db.search_in_collection("agent_a", vec![0.1; 128], 5, None)?; ``` #### `get_memory(id) -> Result` diff --git a/docs/content/docs/api/rust.mdx b/docs/content/docs/api/rust.mdx index 92dc8c1..6cbc51e 100644 --- a/docs/content/docs/api/rust.mdx +++ b/docs/content/docs/api/rust.mdx @@ -57,23 +57,23 @@ let id = db.add_with_content( )?; ``` -#### `ask(embedding, top_k, metadata_filter) -> Result>` +#### `search(embedding, top_k, metadata_filter) -> Result>` Vector similarity search in the default collection. ```rust -let hits = db.ask(vec![0.1; 128], 5, None)?; +let hits = db.search(vec![0.1; 128], 5, None)?; for hit in &hits { println!("ID: {}, Score: {:.3}", hit.id, hit.score); } ``` -#### `ask_in_collection(collection, embedding, top_k, filter) -> Result>` +#### `search_in_collection(collection, embedding, top_k, filter) -> Result>` Collection-scoped search. ```rust -let hits = db.ask_in_collection("agent_a", vec![0.1; 128], 5, None)?; +let hits = db.search_in_collection("agent_a", vec![0.1; 128], 5, None)?; ``` #### `get_memory(id) -> Result` diff --git a/docs/content/docs/getting-started/quickstart.mdx b/docs/content/docs/getting-started/quickstart.mdx index 1fbe1a0..6c259b9 100644 --- a/docs/content/docs/getting-started/quickstart.mdx +++ b/docs/content/docs/getting-started/quickstart.mdx @@ -100,7 +100,7 @@ fn main() -> Result<(), Box> { let id = db.add(embedding.clone(), None)?; // Query - let hits = db.ask(embedding, 5, None)?; + let hits = db.search(embedding, 5, None)?; for hit in &hits { println!("ID: {}, Score: {:.3}", hit.id, hit.score); } diff --git a/docs/content/docs/resources/examples.mdx b/docs/content/docs/resources/examples.mdx index 40d9390..c405c46 100644 --- a/docs/content/docs/resources/examples.mdx +++ b/docs/content/docs/resources/examples.mdx @@ -107,7 +107,7 @@ for hit in hits: ```python # Record a session db = CortexaDB.open("agent.mem", embedder=embedder, record="session.log") -db.add("User asked about pricing") +db.add("User searched about pricing") db.add("Showed enterprise plan") db.connect(1, 2, "led_to") @@ -225,7 +225,7 @@ fn main() -> Result<(), Box> { let id2 = db.add(vec![0.2; 128], Some(meta))?; // Query - let hits = db.ask(emb, 5, None)?; + let hits = db.search(emb, 5, None)?; println!("Found {} results", hits.len()); // Graph diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 54556b5..e40cb69 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -38,7 +38,7 @@ mid3 = db.add("User's name is Alice.", metadata={"source": "onboarding"}) ```python # Semantic search -hits = db.ask("What does the user like?") +hits = db.search("What does the user like?") for hit in hits: print(f"ID: {hit.id}, Score: {hit.score:.3f}") @@ -70,7 +70,7 @@ db.ingest("Long article text here...", strategy="markdown") ```python agent_a = db.collection("agent_a") agent_a.add("Agent A's private memory") -hits = agent_a.ask("query only agent A's memories") +hits = agent_a.search("query only agent A's memories") ``` --- @@ -97,7 +97,7 @@ fn main() -> Result<(), Box> { let id = db.add(embedding.clone(), None)?; // Query - let hits = db.ask(embedding, 5, None)?; + let hits = db.search(embedding, 5, None)?; for hit in &hits { println!("ID: {}, Score: {:.3}", hit.id, hit.score); } diff --git a/docs/guides/collections.md b/docs/guides/collections.md index 457729e..cdc4597 100644 --- a/docs/guides/collections.md +++ b/docs/guides/collections.md @@ -120,7 +120,7 @@ researcher.add("Found 3 relevant papers on AI agents") writer.add("Draft: AI agents are transforming...") # Each agent queries only its own memories -planner_context = planner.search("What tasks are pending?") +planner_context = planner.search("What tsearchs are pending?") ``` ### Shared Knowledge Base diff --git a/docs/guides/core-concepts.md b/docs/guides/core-concepts.md index 750fe9f..72f7977 100644 --- a/docs/guides/core-concepts.md +++ b/docs/guides/core-concepts.md @@ -24,7 +24,7 @@ The database is built around three pillars: │ ┌────────────────────────▼─────────────────────────┐ │ CortexaDB Facade │ -│ High-level API (add, ask, etc.) │ +│ High-level API (add, search, etc.) │ └────────────────────────┬─────────────────────────┘ │ ┌────────────────────────▼─────────────────────────┐ @@ -62,7 +62,7 @@ The database is built around three pillars: ### Facade -The `CortexaDB` facade is the primary entry point. It provides the high-level API (`add`, `ask`, `connect`, etc.) and delegates to the store for durability and concurrency. +The `CortexaDB` facade is the primary entry point. It provides the high-level API (`add`, `search`, `connect`, etc.) and delegates to the store for durability and concurrency. ### Store diff --git a/docs/guides/embedders.md b/docs/guides/embedders.md index 66c05d6..e407b7a 100644 --- a/docs/guides/embedders.md +++ b/docs/guides/embedders.md @@ -14,7 +14,7 @@ db.add("text", embedding=[0.1, 0.2, ...]) # must provide embedding # With embedder - automatic db = CortexaDB.open("db.mem", embedder=OpenAIEmbedder()) db.add("text") # auto-embedded -db.ask("query") # auto-embedded +db.search("query") # auto-embedded ``` --- @@ -118,7 +118,7 @@ class MyEmbedder(Embedder): When an embedder is configured: 1. **`add(text)`** - Text is embedded via `embedder.embed(text)`, then stored with the embedding -2. **`ask(query)`** - Query is embedded via `embedder.embed(query)`, then used for vector search +2. **`search(query)`** - Query is embedded via `embedder.embed(query)`, then used for vector search 3. **`ingest(text)`** - Each chunk is embedded individually after chunking 4. **`load(file)`** - File is read, chunked, and each chunk is embedded diff --git a/docs/guides/query-engine.md b/docs/guides/query-engine.md index e601c4b..5eb6ee5 100644 --- a/docs/guides/query-engine.md +++ b/docs/guides/query-engine.md @@ -97,7 +97,7 @@ When `use_graph=True`, the query engine expands results using BFS traversal: 5. Return final top-k ```python -hits = db.ask("query", use_graph=True) +hits = db.search("query", use_graph=True) ``` Graph expansion only follows edges within the same collection. @@ -120,7 +120,7 @@ When recency is part of the scoring weights: - Combined with other signals via weighted sum ```python -hits = db.ask("query", recency_bias=True) +hits = db.search("query", recency_bias=True) ``` --- @@ -149,7 +149,7 @@ Results can be filtered by metadata key-value pairs: ```python # Only return memories with source="onboarding" -hits = db.ask("query", metadata_filter={"source": "onboarding"}) +hits = db.search("query", metadata_filter={"source": "onboarding"}) ``` Metadata filtering is applied after vector search but before final scoring. @@ -185,19 +185,19 @@ This is primarily used for advanced multi-signal retrieval pipelines. ```python # Basic vector search -hits = db.ask("What does the user prefer?") +hits = db.search("What does the user prefer?") # With graph expansion -hits = db.ask("query", use_graph=True) +hits = db.search("query", use_graph=True) # With recency bias -hits = db.ask("query", recency_bias=True) +hits = db.search("query", recency_bias=True) # Custom top_k -hits = db.ask("query", top_k=10) +hits = db.search("query", top_k=10) # Combined -hits = db.ask("query", top_k=10, use_graph=True, recency_bias=True) +hits = db.search("query", top_k=10, use_graph=True, recency_bias=True) ``` --- diff --git a/docs/index.md b/docs/index.md index c1bc598..646e038 100644 --- a/docs/index.md +++ b/docs/index.md @@ -57,7 +57,7 @@ db = CortexaDB.open("agent.mem", embedder=OpenAIEmbedder()) db.add("The user prefers dark mode.") db.add("User works at Stripe.") -hits = db.ask("What does the user like?") +hits = db.search("What does the user like?") for hit in hits: print(f"ID: {hit.id}, Score: {hit.score}") ``` diff --git a/docs/resources/examples.md b/docs/resources/examples.md index 0a2cd39..f7ec0b6 100644 --- a/docs/resources/examples.md +++ b/docs/resources/examples.md @@ -16,7 +16,7 @@ mid2 = db.add("User works at Stripe.") mid3 = db.add("User's favorite language is Python.") # Search -hits = db.ask("What programming language does the user like?") +hits = db.search("What programming language does the user like?") for hit in hits: mem = db.get_memory(hit.id) print(f"[{hit.score:.3f}] {mem.content.decode()}") @@ -40,7 +40,7 @@ db.connect(alice, acme, "works_at") db.connect(bob, acme, "works_at") # Query with graph expansion -hits = db.ask("Who works at Acme?", use_graph=True) +hits = db.search("Who works at Acme?", use_graph=True) ``` --- @@ -62,7 +62,7 @@ researcher.add("Found: Typical recall is 95% with HNSW") writer.add("Draft intro: Vector databases are transforming AI...") # Each agent queries only its own memories -research = researcher.ask("What did I find about indexing?") +research = researcher.search("What did I find about indexing?") # Admin writes to shared collection shared = db.collection("shared") @@ -70,7 +70,7 @@ shared.add("Company policy: All code must be reviewed") # Agents read from shared collection (read-only) agent_view = db.collection("shared", readonly=True) -guidelines = agent_view.ask("What is the writing style?") +guidelines = agent_view.search("What is the writing style?") ``` --- @@ -92,7 +92,7 @@ Long article about machine learning... ids = db.ingest(article, strategy="semantic", chunk_size=2048) # Query across all ingested documents -hits = db.ask("How do I configure the API?", top_k=10) +hits = db.search("How do I configure the API?", top_k=10) for hit in hits: mem = db.get_memory(hit.id) print(f"[{hit.score:.3f}] {mem.content.decode()[:100]}...") @@ -105,7 +105,7 @@ for hit in hits: ```python # Record a session db = CortexaDB.open("agent.mem", embedder=embedder, record="session.log") -db.add("User asked about pricing") +db.add("User searched about pricing") db.add("Showed enterprise plan") db.connect(1, 2, "led_to") @@ -181,7 +181,7 @@ db.add("Meeting at 3pm", metadata={"category": "schedule"}) db.add("Likes Python", metadata={"category": "preference"}) # Filter by metadata (if supported by your query) -hits = db.ask("What are the user's preferences?") +hits = db.search("What are the user's preferences?") for hit in hits: mem = db.get_memory(hit.id) print(f"{mem.content.decode()} [{mem.metadata}]") @@ -223,7 +223,7 @@ fn main() -> Result<(), Box> { let id2 = db.add(vec![0.2; 128], Some(meta))?; // Query - let hits = db.ask(emb, 5, None)?; + let hits = db.search(emb, 5, None)?; println!("Found {} results", hits.len()); // Graph diff --git a/examples/rust/basic_usage.rs b/examples/rust/basic_usage.rs index 50a2d31..56e92df 100644 --- a/examples/rust/basic_usage.rs +++ b/examples/rust/basic_usage.rs @@ -169,7 +169,7 @@ Content under heading 3. // ----------------------------------------------------------- println!("\n[6] Querying memories..."); let query = "Where does the user live?"; - let hits = db.ask(embed_text(query, dimension), 3, None)?; + let hits = db.search(embed_text(query, dimension), 3, None)?; for hit in hits { let mem = db.get_memory(hit.id)?; let content = String::from_utf8_lossy(&mem.content); @@ -188,7 +188,7 @@ Content under heading 3. None, )?; println!(" Stored in collection 'travel_agent': ID {}", col_id); - let col_hits = db.ask_in_collection( + let col_hits = db.search_in_collection( "travel_agent", embed_text("Tokyo travel plans", dimension), 5, From 4443e56c054e60023b67d9db29208fc6bb47b212 Mon Sep 17 00:00:00 2001 From: anaslimem Date: Mon, 9 Mar 2026 06:07:30 +0100 Subject: [PATCH 4/6] refactored other functions --- crates/cortexadb-core/src/facade.rs | 12 ++++++------ crates/cortexadb-core/tests/integration.rs | 6 +++--- crates/cortexadb-py/cortexadb/client.py | 11 ++++------- crates/cortexadb-py/src/lib.rs | 4 ++-- crates/cortexadb-py/test_smoke.py | 6 +++--- docs/api/python.md | 6 +++--- docs/api/rust.md | 4 ++-- docs/content/docs/api/rust.mdx | 4 ++-- docs/guides/replay.md | 2 +- 9 files changed, 26 insertions(+), 29 deletions(-) diff --git a/crates/cortexadb-core/src/facade.rs b/crates/cortexadb-core/src/facade.rs index b47a408..89c5333 100644 --- a/crates/cortexadb-core/src/facade.rs +++ b/crates/cortexadb-core/src/facade.rs @@ -439,7 +439,7 @@ impl CortexaDB { /// # Errors /// /// Returns [`CortexaDBError`] if the deletion cannot be logged. - pub fn delete_memory(&self, id: u64) -> Result<()> { + pub fn delete(&self, id: u64) -> Result<()> { self.inner.delete_memory(MemoryId(id))?; Ok(()) } @@ -639,7 +639,7 @@ mod tests { } #[test] - fn test_delete_memory_removes_from_stats() { + fn test_delete_removes_from_stats() { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); @@ -647,12 +647,12 @@ mod tests { let id = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); assert_eq!(db.stats().entries, 1); - db.delete_memory(id).unwrap(); + db.delete(id).unwrap(); assert_eq!(db.stats().entries, 0, "entry count should be 0 after delete"); } #[test] - fn test_delete_memory_not_returned_in_ask() { + fn test_delete_not_returned_in_ask() { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); @@ -662,7 +662,7 @@ mod tests { // (ask() returns NoEmbeddings when the vector index is completely empty.) let _id_keep = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); - db.delete_memory(id).unwrap(); + db.delete(id).unwrap(); let hits = db.search(vec![1.0, 0.0, 0.0], 10, None).unwrap(); assert!( @@ -678,7 +678,7 @@ mod tests { let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); let id = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); - db.delete_memory(id).unwrap(); + db.delete(id).unwrap(); let result = db.get_memory(id); assert!(result.is_err(), "get_memory on a deleted ID must return an error"); diff --git a/crates/cortexadb-core/tests/integration.rs b/crates/cortexadb-core/tests/integration.rs index f2c4c84..b612137 100644 --- a/crates/cortexadb-core/tests/integration.rs +++ b/crates/cortexadb-core/tests/integration.rs @@ -153,7 +153,7 @@ fn test_delete_persists_across_recovery() { let db = open_db(&path); deleted_id = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); kept_id = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); - db.delete_memory(deleted_id).unwrap(); + db.delete(deleted_id).unwrap(); assert_eq!(db.stats().entries, 1); } @@ -174,7 +174,7 @@ fn test_delete_then_checkpoint_recovery() { let db = open_db(&path); deleted_id = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); db.add(vec![0.0, 1.0, 0.0], None).unwrap(); - db.delete_memory(deleted_id).unwrap(); + db.delete(deleted_id).unwrap(); db.flush().unwrap(); // ensure WAL is synced before checkpoint db.checkpoint().unwrap(); } @@ -321,7 +321,7 @@ fn test_hnsw_recovery_sync() { id_target = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); // Delete an item that WAS saved in the HNSW on disk, AFTER the checkpoint - db.delete_memory(id_deleted).unwrap(); + db.delete(id_deleted).unwrap(); // The process crashes/drops here. HNSW index on disk is STALE. } diff --git a/crates/cortexadb-py/cortexadb/client.py b/crates/cortexadb-py/cortexadb/client.py index a57289a..0283523 100644 --- a/crates/cortexadb-py/cortexadb/client.py +++ b/crates/cortexadb-py/cortexadb/client.py @@ -126,9 +126,7 @@ def delete(self, mid: int) -> None: self._db.delete(mid) # Legacy Aliases - def ask(self, *a, **k): return self.search(*a, **k) - def ingest_document(self, *a, **k): return self.ingest(*a, **k) - def delete_memory(self, mid: int): self.delete(mid) + # All removed. def __repr__(self) -> str: return f"Collection(name={self.name!r}, mode={'readonly' if self._readonly else 'readwrite'})" @@ -359,7 +357,7 @@ def add_batch(self, records: t.List[t.Dict]) -> t.List[int]: def ingest(self, text: str, **kwargs) -> t.List[int]: """Ingest text with 100x speedup via batching.""" if not self._embedder: - raise CortexaDBConfigError("ingest_document requires an embedder.") + raise CortexaDBConfigError("ingest requires an embedder.") chunks = chunk(text, **kwargs) if not chunks: return [] @@ -379,7 +377,7 @@ def _resolve_embedding(self, text, supplied): return self._embedder.embed(text) def get(self, mid: int) -> Memory: return self._inner.get(mid) - def delete(self, mid: int): self._inner.delete_memory(mid) + def delete(self, mid: int): self._inner.delete(mid) def compact(self): self._inner.compact() def checkpoint(self): self._inner.checkpoint() def stats(self): return self._inner.stats() @@ -392,5 +390,4 @@ def __exit__(self, *a): return False # Legacy Aliases - def ingest_document(self, *a, **k): return self.ingest(*a, **k) - def delete_memory(self, mid: int): self.delete(mid) + # All removed. diff --git a/crates/cortexadb-py/src/lib.rs b/crates/cortexadb-py/src/lib.rs index 68eb1b6..bab911e 100644 --- a/crates/cortexadb-py/src/lib.rs +++ b/crates/cortexadb-py/src/lib.rs @@ -561,8 +561,8 @@ impl PyCortexaDB { /// Raises: /// CortexaDBError: If the memory ID does not exist or deletion fails. #[pyo3(text_signature = "(self, mid)")] - fn delete_memory(&self, py: Python<'_>, mid: u64) -> PyResult<()> { - py.allow_threads(|| self.inner.delete_memory(mid)).map_err(map_cortexadb_err) + fn delete(&self, py: Python<'_>, mid: u64) -> PyResult<()> { + py.allow_threads(|| self.inner.delete(mid)).map_err(map_cortexadb_err) } /// Create an edge between two memories. diff --git a/crates/cortexadb-py/test_smoke.py b/crates/cortexadb-py/test_smoke.py index 7902fe7..1edadcb 100644 --- a/crates/cortexadb-py/test_smoke.py +++ b/crates/cortexadb-py/test_smoke.py @@ -140,7 +140,7 @@ def test_add_without_embedder_requires_embedding(): with pytest.raises(CortexaDBError, match="No embedder"): db.add("No embedding provided") -def test_ingest_document(): +def test_ingest(): emb = HashEmbedder(dimension=32) db = CortexaDB.open(DB_PATH, embedder=emb) long_text = ("The quick brown fox jumps over the lazy dog. " * 30).strip() @@ -149,9 +149,9 @@ def test_ingest_document(): assert len(set(ids)) == len(ids) # all IDs unique assert db.stats().entries == len(ids) -def test_ingest_document_requires_embedder(): +def test_ingest_requires_embedder(): db = CortexaDB.open(DB_PATH, dimension=16) - with pytest.raises(CortexaDBError, match="ingest_document"): + with pytest.raises(CortexaDBError, match="ingest"): db.ingest("some text") def test_collection_auto_embed(): diff --git a/docs/api/python.md b/docs/api/python.md index 5e2fa16..508b248 100644 --- a/docs/api/python.md +++ b/docs/api/python.md @@ -147,7 +147,7 @@ print(mem.embedding) # [0.1, 0.2, ...] or None --- -### `.delete_memory(mid)` +### `.delete(mid)` Permanently deletes a memory and updates all indexes. @@ -161,7 +161,7 @@ Permanently deletes a memory and updates all indexes. **Example:** ```python -db.delete_memory(42) +db.delete(42) ``` --- @@ -257,7 +257,7 @@ db.load("paper.pdf", strategy="recursive", chunk_size=1024) --- -### `.ingest_document(text, chunk_size=512, overlap=50, metadata=None, collection=None)` +### `.ingest(text, chunk_size=512, overlap=50, metadata=None, collection=None)` Legacy method for chunking and storing text. Uses fixed chunking. diff --git a/docs/api/rust.md b/docs/api/rust.md index 80a4ceb..233d3cd 100644 --- a/docs/api/rust.md +++ b/docs/api/rust.md @@ -80,12 +80,12 @@ let mem = db.get_memory(42)?; println!("{:?}", mem.metadata); ``` -#### `delete_memory(id) -> Result<()>` +#### `delete(id) -> Result<()>` Deletes a memory and updates all indexes. ```rust -db.delete_memory(42)?; +db.delete(42)?; ``` --- diff --git a/docs/content/docs/api/rust.mdx b/docs/content/docs/api/rust.mdx index 6cbc51e..c95a3e0 100644 --- a/docs/content/docs/api/rust.mdx +++ b/docs/content/docs/api/rust.mdx @@ -85,12 +85,12 @@ let mem = db.get_memory(42)?; println!("{:?}", mem.metadata); ``` -#### `delete_memory(id) -> Result<()>` +#### `delete(id) -> Result<()>` Deletes a memory and updates all indexes. ```rust -db.delete_memory(42)?; +db.delete(42)?; ``` --- diff --git a/docs/guides/replay.md b/docs/guides/replay.md index 61a24e1..dea9d73 100644 --- a/docs/guides/replay.md +++ b/docs/guides/replay.md @@ -19,7 +19,7 @@ db = CortexaDB.open("agent.mem", dimension=128, record="session.log") mid1 = db.add("User likes dark mode", embedding=[...]) mid2 = db.add("User works at Stripe", embedding=[...]) db.connect(mid1, mid2, "relates_to") -db.delete_memory(mid1) +db.delete(mid1) db.compact() db.checkpoint() ``` From 47f967f140c93c165103d44b424c2b9b65dcc6f7 Mon Sep 17 00:00:00 2001 From: anaslimem Date: Mon, 9 Mar 2026 06:08:01 +0100 Subject: [PATCH 5/6] Removed changelog file --- CHANGELOG.md | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index 315d84e..0000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,36 +0,0 @@ -# Changelog - -All notable changes to this project will be documented in this file. - -The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), -and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). - -## [0.1.8] - 2026-03-08 - -### Added -- **100x Faster Ingestion**: Implemented Rust-level batching in `remember_batch` and Python `ingest()`. -- **Modernized API**: New `Collection` abstraction and unified `add`/`search` methods. -- **Fluent Query Builder**: Chainable interface for complex discovery queries. - -### Fixed -- **CI Regression Fixes**: Resolved keyword argument collisions and restored missing core methods. -- **Graph Isolation**: Ensured graph-based discovery respects collection boundaries. - -## [0.1.7] - 2026-03-07 - -### Added -- Initial batching support (pre-release). - -## [0.1.0] - 2026-02-25 - -### Added -- Initial release of **CortexaDB**. -- Core Rust engine with Log-Structured Merge patterns. -- Write-Ahead Log (WAL) for crash safety. -- Python bindings via PyO3 and maturain. -- Vector semantic search and Graph relationship support. -- Temporal query boosting. -- Multi-agent collections. - -### Fixed -- Python 3.14 build compatibility in CI. From 4ab27f9175c48c53dd51eb7bdee37aafdd72398c Mon Sep 17 00:00:00 2001 From: anaslimem Date: Mon, 9 Mar 2026 06:17:25 +0100 Subject: [PATCH 6/6] Big refactor --- benchmark/cortexadb_runner.py | 8 ++++---- crates/cortexadb-core/src/core/command.rs | 10 +++++----- crates/cortexadb-core/src/core/state_machine.rs | 10 +++++----- crates/cortexadb-core/src/engine.rs | 12 ++++++------ crates/cortexadb-core/src/facade.rs | 14 +++++++------- crates/cortexadb-core/src/storage/wal.rs | 4 ++-- crates/cortexadb-core/src/store.rs | 16 ++++++++-------- crates/cortexadb-py/cortexadb/client.py | 2 +- .../cortexadb-py/cortexadb/providers/gemini.py | 2 +- .../cortexadb-py/cortexadb/providers/ollama.py | 2 +- .../cortexadb-py/cortexadb/providers/openai.py | 2 +- 11 files changed, 41 insertions(+), 41 deletions(-) diff --git a/benchmark/cortexadb_runner.py b/benchmark/cortexadb_runner.py index 658bdc4..a0aa44c 100644 --- a/benchmark/cortexadb_runner.py +++ b/benchmark/cortexadb_runner.py @@ -83,7 +83,7 @@ def run_benchmark( db = CortexaDB.open(db_path, dimension=len(embeddings[0])) for i, emb in enumerate(embeddings): - db.remember(f"memory_{i}", embedding=emb) + db.add(f"memory_{i}", embedding=emb) # Force checkpoint to flush db.checkpoint() @@ -108,7 +108,7 @@ def run_benchmark( # === WARMUP === print(f"Warming up with {warmup_queries} queries...") for i in range(warmup_queries): - _ = db._inner.ask_embedding( + _ = db._inner.search_embedding( embedding=queries[i % len(queries)], top_k=top_k ) @@ -120,7 +120,7 @@ def run_benchmark( query = queries[i % len(queries)] start = time.perf_counter() - hits = db._inner.ask_embedding(embedding=query, top_k=top_k) + hits = db._inner.search_embedding(embedding=query, top_k=top_k) elapsed = (time.perf_counter() - start) * 1000 # ms latencies.append(elapsed) @@ -151,7 +151,7 @@ def run_benchmark( exact_ids = exact_search(embeddings, query, top_k) # Get HNSW results - hits = db._inner.ask_embedding(embedding=query, top_k=top_k) + hits = db._inner.search_embedding(embedding=query, top_k=top_k) hnsw_ids = [hit.id for hit in hits] # Calculate recall diff --git a/crates/cortexadb-core/src/core/command.rs b/crates/cortexadb-core/src/core/command.rs index bf12354..13453b9 100644 --- a/crates/cortexadb-core/src/core/command.rs +++ b/crates/cortexadb-core/src/core/command.rs @@ -8,7 +8,7 @@ pub enum Command { /// Insert or update a memory entry InsertMemory(MemoryEntry), /// Delete a memory entry by ID - DeleteMemory(MemoryId), + Delete(MemoryId), /// Add an edge between two memories with a relation type AddEdge { from: MemoryId, to: MemoryId, relation: String }, /// Remove an edge between two memories @@ -20,8 +20,8 @@ impl Command { Command::InsertMemory(entry) } - pub fn delete_memory(id: MemoryId) -> Self { - Command::DeleteMemory(id) + pub fn delete(id: MemoryId) -> Self { + Command::Delete(id) } pub fn add_edge(from: MemoryId, to: MemoryId, relation: String) -> Self { @@ -49,9 +49,9 @@ mod tests { #[test] fn test_delete_command() { - let cmd = Command::delete_memory(MemoryId(1)); + let cmd = Command::delete(MemoryId(1)); match cmd { - Command::DeleteMemory(id) => assert_eq!(id, MemoryId(1)), + Command::Delete(id) => assert_eq!(id, MemoryId(1)), _ => panic!("Expected DeleteMemory"), } } diff --git a/crates/cortexadb-core/src/core/state_machine.rs b/crates/cortexadb-core/src/core/state_machine.rs index 1424929..9b965ab 100644 --- a/crates/cortexadb-core/src/core/state_machine.rs +++ b/crates/cortexadb-core/src/core/state_machine.rs @@ -44,7 +44,7 @@ impl StateMachine { pub fn apply_command(&mut self, cmd: Command) -> Result<()> { match cmd { Command::InsertMemory(entry) => self.insert_memory(entry), - Command::DeleteMemory(id) => self.delete_memory(id), + Command::Delete(id) => self.delete(id), Command::AddEdge { from, to, relation } => self.add_edge(from, to, relation), Command::RemoveEdge { from, to } => self.remove_edge(from, to), } @@ -83,7 +83,7 @@ impl StateMachine { } /// Delete a memory entry and its edges - pub fn delete_memory(&mut self, id: MemoryId) -> Result<()> { + pub fn delete(&mut self, id: MemoryId) -> Result<()> { if !self.memories.contains_key(&id) { return Err(StateMachineError::MemoryNotFound(id)); } @@ -238,13 +238,13 @@ mod tests { } #[test] - fn test_delete_memory() { + fn test_delete() { let mut sm = StateMachine::new(); let entry = create_test_entry(1, "default", 1000); sm.insert_memory(entry).unwrap(); assert_eq!(sm.len(), 1); - sm.delete_memory(MemoryId(1)).unwrap(); + sm.delete(MemoryId(1)).unwrap(); assert_eq!(sm.len(), 0); assert!(sm.get_memory(MemoryId(1)).is_err()); } @@ -365,7 +365,7 @@ mod tests { sm.add_edge(MemoryId(1), MemoryId(2), "refers".to_string()).unwrap(); // Delete memory 2 - sm.delete_memory(MemoryId(2)).unwrap(); + sm.delete(MemoryId(2)).unwrap(); // Edge should be cleaned up let neighbors = sm.get_neighbors(MemoryId(1)).unwrap(); diff --git a/crates/cortexadb-core/src/engine.rs b/crates/cortexadb-core/src/engine.rs index 645b4d5..af9f917 100644 --- a/crates/cortexadb-core/src/engine.rs +++ b/crates/cortexadb-core/src/engine.rs @@ -161,7 +161,7 @@ impl Engine { repaired_segments = true; } } - Command::DeleteMemory(id) => { + Command::Delete(id) => { // Delete may refer to a missing segment entry in crash scenarios. let _ = segments.delete_entry(*id); } @@ -264,7 +264,7 @@ impl Engine { // Write entry to segment storage self._write_entry_to_segments(entry)?; } - Command::DeleteMemory(id) => { + Command::Delete(id) => { // Mark as deleted in segments self.segments.delete_entry(*id)?; } @@ -374,9 +374,9 @@ impl Engine { } if sync_immediately { - self.execute_command(Command::DeleteMemory(id))?; + self.execute_command(Command::delete(id))?; } else { - self.execute_command_unsynced(Command::DeleteMemory(id))?; + self.execute_command_unsynced(Command::delete(id))?; } evicted_ids.push(id); } @@ -658,7 +658,7 @@ mod tests { engine.execute_command(Command::InsertMemory(entry)).unwrap(); } for id in [0_u64, 1, 2, 3] { - engine.execute_command(Command::DeleteMemory(MemoryId(id))).unwrap(); + engine.execute_command(Command::delete(MemoryId(id))).unwrap(); } let report = engine.compact_segments().unwrap(); @@ -885,7 +885,7 @@ mod tests { } // Delete one memory - engine.execute_command(Command::DeleteMemory(MemoryId(2))).unwrap(); + engine.execute_command(Command::delete(MemoryId(2))).unwrap(); assert_eq!(engine.get_state_machine().len(), 4); // 5 - 1 } diff --git a/crates/cortexadb-core/src/facade.rs b/crates/cortexadb-core/src/facade.rs index 89c5333..e04f79d 100644 --- a/crates/cortexadb-core/src/facade.rs +++ b/crates/cortexadb-core/src/facade.rs @@ -19,7 +19,7 @@ use crate::store::{CheckpointPolicy, CortexaDBStore, CortexaDBStoreError}; // Public types // --------------------------------------------------------------------------- -/// Returned by [`CortexaDB::ask`] — a scored memory hit. +/// Returned by [`CortexaDB::search`] — a scored memory hit. #[derive(Debug, Clone)] pub struct Hit { pub id: u64, @@ -138,7 +138,7 @@ pub enum CortexaDBError { pub type Result = std::result::Result; // --------------------------------------------------------------------------- -// Embedder adapter (used internally for `ask`) +// Embedder adapter (used internally for `search`) // --------------------------------------------------------------------------- struct StaticEmbedder { @@ -440,7 +440,7 @@ impl CortexaDB { /// /// Returns [`CortexaDBError`] if the deletion cannot be logged. pub fn delete(&self, id: u64) -> Result<()> { - self.inner.delete_memory(MemoryId(id))?; + self.inner.delete(MemoryId(id))?; Ok(()) } @@ -652,14 +652,14 @@ mod tests { } #[test] - fn test_delete_not_returned_in_ask() { + fn test_search_not_returned_in_search() { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); let id = db.add(vec![1.0, 0.0, 0.0], None).unwrap(); // Keep a second entry so the index is non-empty after deletion. - // (ask() returns NoEmbeddings when the vector index is completely empty.) + // (search() returns NoEmbeddings when the vector index is completely empty.) let _id_keep = db.add(vec![0.0, 1.0, 0.0], None).unwrap(); db.delete(id).unwrap(); @@ -721,7 +721,7 @@ mod tests { } #[test] - fn test_ask_in_collection_only_returns_own_collection() { + fn test_search_in_collection_only_returns_own_collection() { let temp = TempDir::new().unwrap(); let path = temp.path().join("testdb"); let db = CortexaDB::open(path.to_str().unwrap(), 3).unwrap(); @@ -760,7 +760,7 @@ mod tests { // ----- ask_in_collection: sparse collection over-fetch regression ----- #[test] - fn test_ask_in_collection_finds_entry_in_sparse_collection() { + fn test_search_in_collection_finds_entry_in_sparse_collection() { // Regression: before the 4× fix, ask_in_collection returned empty results when the // target collection had far fewer entries than top_k * candidate_multiplier entries globally. let temp = TempDir::new().unwrap(); diff --git a/crates/cortexadb-core/src/storage/wal.rs b/crates/cortexadb-core/src/storage/wal.rs index d679d40..f1fafff 100644 --- a/crates/cortexadb-core/src/storage/wal.rs +++ b/crates/cortexadb-core/src/storage/wal.rs @@ -484,7 +484,7 @@ mod tests { to: crate::core::memory_entry::MemoryId(2), relation: "refers_to".to_string(), }; - let cmd3 = Command::DeleteMemory(crate::core::memory_entry::MemoryId(1)); + let cmd3 = Command::delete(crate::core::memory_entry::MemoryId(1)); wal.append(&cmd1).unwrap(); wal.append(&cmd2).unwrap(); @@ -511,7 +511,7 @@ mod tests { } match &recovered[2].1 { - Command::DeleteMemory(id) => assert_eq!(*id, crate::core::memory_entry::MemoryId(1)), + Command::Delete(id) => assert_eq!(*id, crate::core::memory_entry::MemoryId(1)), _ => panic!("Wrong command type"), } } diff --git a/crates/cortexadb-core/src/store.rs b/crates/cortexadb-core/src/store.rs index ad7d4a6..7ce3e79 100644 --- a/crates/cortexadb-core/src/store.rs +++ b/crates/cortexadb-core/src/store.rs @@ -618,9 +618,9 @@ impl CortexaDBStore { Ok(last_cmd_id) } - pub fn delete_memory(&self, id: MemoryId) -> Result { + pub fn delete(&self, id: MemoryId) -> Result { let mut writer = self.writer.lock().expect("writer lock poisoned"); - self.execute_write_transaction_locked(&mut writer, WriteOp::DeleteMemory(id)) + self.execute_write_transaction_locked(&mut writer, WriteOp::Delete(id)) } pub fn add_edge(&self, from: MemoryId, to: MemoryId, relation: String) -> Result { @@ -883,11 +883,11 @@ impl CortexaDBStore { } id } - WriteOp::DeleteMemory(id) => { + WriteOp::Delete(id) => { let cmd_id = if sync_now { - writer.engine.execute_command(Command::DeleteMemory(id))? + writer.engine.execute_command(Command::Delete(id))? } else { - writer.engine.execute_command_unsynced(Command::DeleteMemory(id))? + writer.engine.execute_command_unsynced(Command::Delete(id))? }; let _ = writer.indexes.vector_index_mut().remove(id); cmd_id @@ -1055,7 +1055,7 @@ impl Drop for CortexaDBStore { enum WriteOp { InsertMemory(MemoryEntry), - DeleteMemory(MemoryId), + Delete(MemoryId), AddEdge { from: MemoryId, to: MemoryId, relation: String }, RemoveEdge { from: MemoryId, to: MemoryId }, } @@ -1119,7 +1119,7 @@ mod tests { store.insert_memory(entry).unwrap(); assert_eq!(store.indexed_embeddings(), 1); - store.delete_memory(MemoryId(10)).unwrap(); + store.delete(MemoryId(10)).unwrap(); assert_eq!(store.indexed_embeddings(), 0); } @@ -1410,7 +1410,7 @@ mod tests { // Remove 3 items (they become tombstones in HNSW) for i in 2..5 { - store.delete_memory(MemoryId(i)).unwrap(); + store.delete(MemoryId(i)).unwrap(); } assert_eq!(store.indexed_embeddings(), 2); diff --git a/crates/cortexadb-py/cortexadb/client.py b/crates/cortexadb-py/cortexadb/client.py index 0283523..6e139cf 100644 --- a/crates/cortexadb-py/cortexadb/client.py +++ b/crates/cortexadb-py/cortexadb/client.py @@ -196,7 +196,7 @@ def replay(cls, log_path: str, db_path: str, **kwargs) -> "CortexaDB": report["op_counts"][op_type] = report["op_counts"].get(op_type, 0) + 1 try: - if op_type in ("add", "remember"): + if op_type == "add": new_id = db.add( text=op.get("text"), vector=op.get("embedding"), diff --git a/crates/cortexadb-py/cortexadb/providers/gemini.py b/crates/cortexadb-py/cortexadb/providers/gemini.py index 13f4e5a..b4c7cfd 100644 --- a/crates/cortexadb-py/cortexadb/providers/gemini.py +++ b/crates/cortexadb-py/cortexadb/providers/gemini.py @@ -14,7 +14,7 @@ embedder=GeminiEmbedder(api_key="AIza...", model="models/text-embedding-004"), ) db.add("We chose Stripe for payments") - hits = db.ask("payment provider?") + hits = db.search("payment provider?") """ from __future__ import annotations diff --git a/crates/cortexadb-py/cortexadb/providers/ollama.py b/crates/cortexadb-py/cortexadb/providers/ollama.py index 7887693..048e030 100644 --- a/crates/cortexadb-py/cortexadb/providers/ollama.py +++ b/crates/cortexadb-py/cortexadb/providers/ollama.py @@ -15,7 +15,7 @@ embedder=OllamaEmbedder(model="nomic-embed-text"), ) db.add("We chose Stripe for payments") - hits = db.ask("payment provider?") + hits = db.search("payment provider?") """ from __future__ import annotations diff --git a/crates/cortexadb-py/cortexadb/providers/openai.py b/crates/cortexadb-py/cortexadb/providers/openai.py index 9ffd7f8..535f20e 100644 --- a/crates/cortexadb-py/cortexadb/providers/openai.py +++ b/crates/cortexadb-py/cortexadb/providers/openai.py @@ -14,7 +14,7 @@ embedder=OpenAIEmbedder(api_key="sk-...", model="text-embedding-3-small"), ) db.add("We chose Stripe for payments") - hits = db.ask("payment provider?") + hits = db.search("payment provider?") """ from __future__ import annotations