From 9a699774ae48670db3ecfab25544be1867534541 Mon Sep 17 00:00:00 2001 From: anaslimem Date: Sun, 8 Mar 2026 03:03:12 +0100 Subject: [PATCH] Updated docs --- README.md | 9 +- docs/content/docs/api/python.mdx | 82 ++++----- docs/content/docs/api/rust.mdx | 27 ++- .../docs/getting-started/quickstart.mdx | 22 +-- docs/content/docs/guides/collections.mdx | 160 ++++++++++++++++++ docs/content/docs/guides/meta.json | 4 +- docs/content/docs/guides/namespaces.mdx | 160 ------------------ docs/content/docs/guides/replay.mdx | 6 +- docs/content/docs/index.mdx | 10 +- docs/content/docs/resources/benchmarks.mdx | 12 ++ docs/content/docs/resources/examples.mdx | 62 +++---- examples/python/basic_usage.py | 4 +- 12 files changed, 292 insertions(+), 266 deletions(-) create mode 100644 docs/content/docs/guides/collections.mdx delete mode 100644 docs/content/docs/guides/namespaces.mdx diff --git a/README.md b/README.md index fb40eae..f6674de 100644 --- a/README.md +++ b/README.md @@ -72,10 +72,11 @@ pip install cortexadb[docs,pdf] # Optional: For PDF/Docx support
Technical Architecture & Benchmarks -### Performance Benchmarks (v0.1.7) -Measured on M2 Mac with 1,000 chunks of text. +### Performance Benchmarks (v0.1.8) -| Operation | v0.1.6 (Sync) | v0.1.7 (Batch) | Improvement | +CortexaDB `v0.1.8` introduced a new batching architecture. Measured on an M2 Mac with 1,000 chunks of text: + +| Operation | v0.1.6 (Sync) | v0.1.8 (Batch) | Improvement | |-----------|---------------|----------------|-------------| | Ingestion | 12.4s | **0.12s** | **103x Faster** | | Memory Add| 15ms | 1ms | 15x Faster | @@ -86,7 +87,7 @@ Measured on M2 Mac with 1,000 chunks of text. --- ## License & Status -CortexaDB is currently in **Beta (v0.1.7)**. It is released under the **MIT** and **Apache-2.0** licenses. +CortexaDB is currently in **Beta (v0.1.8)**. It is released under the **MIT** and **Apache-2.0** licenses. We are actively refining the API and welcome feedback! --- diff --git a/docs/content/docs/api/python.mdx b/docs/content/docs/api/python.mdx index bfaa3ba..b447cb6 100644 --- a/docs/content/docs/api/python.mdx +++ b/docs/content/docs/api/python.mdx @@ -72,7 +72,7 @@ report = db.last_replay_report ## Memory Operations -### `.remember(text, embedding=None, metadata=None)` +### `.add(text=None, vector=None, metadata=None, collection=None)` Stores a new memory entry. If an embedder is configured and no embedding is provided, the text is auto-embedded. @@ -80,41 +80,39 @@ Stores a new memory entry. If an embedder is configured and no embedding is prov | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `text` | `str` | Required | Text content to store | -| `embedding` | `list[float]?` | `None` | Pre-computed embedding vector | +| `text` | `str?` | `None` | Text content to store | +| `vector` | `list[float]?` | `None` | Pre-computed embedding vector | | `metadata` | `dict[str, str]?` | `None` | Key-value metadata pairs | +| `collection` | `str?` | `"default"` | Target collection | **Returns:** `int` - The assigned memory ID **Example:** ```python -mid = db.remember("User prefers dark mode") -mid = db.remember("text", metadata={"source": "onboarding"}) -mid = db.remember("text", embedding=[0.1, 0.2, ...]) +mid = db.add("User prefers dark mode") +mid = db.add("text", metadata={"source": "onboarding"}) +mid = db.add("text", vector=[0.1, 0.2, ...], collection="agent_a") ``` --- -### `.ask(query, embedding=None, top_k=5, use_graph=False, recency_bias=False)` +### `.query(text=None, vector=None)` -Performs a hybrid search across the database. +Starts a fluent query builder to search across the database. -**Parameters:** - -| Parameter | Type | Default | Description | -|-----------|------|---------|-------------| -| `query` | `str` | Required | Search query text | -| `embedding` | `list[float]?` | `None` | Pre-computed query embedding | -| `top_k` | `int` | `5` | Number of results to return | -| `use_graph` | `bool` | `False` | Enable graph expansion via BFS | -| `recency_bias` | `bool` | `False` | Boost recent memories in scoring | +**Methods:** -**Returns:** `list[Hit]` +| Method | Description | +|--------|-------------| +| `.limit(n)` | Set maximum number of results (default 5) | +| `.collection(name)` | Filter to a specific collection | +| `.use_graph()` | Enable hybrid graph traversal | +| `.recency_bias()` | Boost recent memories in scoring | +| `.execute()` | Run the query and return `list[Hit]` | **Example:** ```python -hits = db.ask("What does the user prefer?") -hits = db.ask("query", top_k=10, use_graph=True, recency_bias=True) +hits = db.query("What does the user prefer?").limit(5).use_graph().execute() for hit in hits: print(f"ID: {hit.id}, Score: {hit.score:.3f}") @@ -122,7 +120,7 @@ for hit in hits: --- -### `.get_memory(mid)` +### `.get(mid)` Retrieves a full memory entry by ID. @@ -138,7 +136,7 @@ Retrieves a full memory entry by ID. **Example:** ```python -mem = db.get_memory(42) +mem = db.get(42) print(mem.id) # 42 print(mem.content) # b"User prefers dark mode" print(mem.namespace) # "default" @@ -150,7 +148,7 @@ print(mem.embedding) # [0.1, 0.2, ...] or None --- -### `.delete_memory(mid)` +### `.delete(mid)` Permanently deletes a memory and updates all indexes. @@ -164,7 +162,7 @@ Permanently deletes a memory and updates all indexes. **Example:** ```python -db.delete_memory(42) +db.delete(42) ``` --- @@ -189,7 +187,7 @@ db.connect(1, 2, "relates_to") db.connect(1, 3, "caused_by") ``` -> Both memories must be in the same namespace. Cross-namespace edges are forbidden. +> Both memories must be in the same collection. Cross-collection edges are forbidden. --- @@ -229,7 +227,7 @@ Chunks text and stores each chunk as a memory. | `chunk_size` | `int` | `512` | Target chunk size in characters | | `overlap` | `int` | `50` | Overlap between chunks | | `metadata` | `dict?` | `None` | Metadata to attach to all chunks | -| `namespace` | `str?` | `None` | Target namespace | +| `collection` | `str?` | `None` | Target collection | **Returns:** `list[int]` - Memory IDs of stored chunks @@ -248,7 +246,7 @@ Loads a file, chunks it, and stores each chunk. | `chunk_size` | `int` | `512` | Target chunk size | | `overlap` | `int` | `50` | Overlap between chunks | | `metadata` | `dict?` | `None` | Metadata for all chunks | -| `namespace` | `str?` | `None` | Target namespace | +| `collection` | `str?` | `None` | Target collection | **Supported formats:** `.txt`, `.md`, `.json`, `.docx` (requires `cortexadb[docs]`), `.pdf` (requires `cortexadb[pdf]`) @@ -260,34 +258,28 @@ db.load("paper.pdf", strategy="recursive", chunk_size=1024) --- -### `.ingest_document(text, chunk_size=512, overlap=50, metadata=None, namespace=None)` - -Legacy method for chunking and storing text. Uses fixed chunking. - ---- - -## Namespace +## Collections -### `.namespace(name, readonly=False)` +### `.collection(name, readonly=False)` -Returns a scoped view of the database for a specific namespace. +Returns a scoped view of the database for a specific collection. **Parameters:** | Parameter | Type | Default | Description | |-----------|------|---------|-------------| -| `name` | `str` | Required | Namespace name | +| `name` | `str` | Required | Collection name | | `readonly` | `bool` | `False` | If `True`, write operations raise errors | -**Returns:** `Namespace` +**Returns:** `Collection` **Example:** ```python -ns = db.namespace("agent_a") -mid = ns.remember("text") -hits = ns.ask("query") -ns.delete_memory(mid) -ns.ingest_document("long text") +col = db.collection("agent_a") +mid = col.add("text") +hits = col.query("query").execute() +col.delete(mid) +col.ingest("long text") ``` --- @@ -382,12 +374,12 @@ Query result from `.ask()`. ### `Memory` -Full memory entry from `.get_memory()`. +Full memory entry from `.get()`. | Field | Type | Description | |-------|------|-------------| | `id` | `int` | Memory ID | -| `namespace` | `str` | Namespace name | +| `namespace` | `str` | Collection name (internal key) | | `content` | `bytes` | Raw content | | `embedding` | `list[float]?` | Vector embedding | | `metadata` | `dict[str, str]` | Key-value metadata | diff --git a/docs/content/docs/api/rust.mdx b/docs/content/docs/api/rust.mdx index ac70273..64fb63b 100644 --- a/docs/content/docs/api/rust.mdx +++ b/docs/content/docs/api/rust.mdx @@ -12,13 +12,15 @@ The high-level API for interacting with the database. ### Opening a Database ```rust -use cortexadb_core::CortexaDB; +use cortexadb_core::{CortexaDB, CortexaDBBuilder}; // Simple open with default config -let db = CortexaDB::open("/path/to/db", 128)?; +let db = CortexaDBBuilder::new("/path/to/db", 128).build()?; // Builder pattern for advanced config -let db = CortexaDB::builder("/path/to/db", config).build()?; +let db = CortexaDBBuilder::new("/path/to/db", 128) + .with_sync_policy(cortexadb_core::engine::SyncPolicy::Async { interval_ms: 1000 }) + .build()?; ``` --- @@ -142,6 +144,25 @@ println!("Indexed: {}", stats.indexed_embeddings); --- +## Observability / Telemetry + +CortexaDB uses the standard Rust [`log`](https://crates.io/crates/log) crate for all internal diagnostics and telemetry. It issues structured `debug!` and `trace!` logs instead of printing to stdout/stderr. + +To see CortexaDB metrics and internal operations in your application, initialize a logger (like `env_logger` or `tracing-subscriber`): + +```rust +use env_logger; + +fn main() { + // Initialize the logger before opening the database + env_logger::init(); + + // In your terminal, run with: RUST_LOG=cortexadb_core=debug cargo run +} +``` + +--- + ## Types ### `Hit` diff --git a/docs/content/docs/getting-started/quickstart.mdx b/docs/content/docs/getting-started/quickstart.mdx index a73f188..0bee787 100644 --- a/docs/content/docs/getting-started/quickstart.mdx +++ b/docs/content/docs/getting-started/quickstart.mdx @@ -30,23 +30,23 @@ db = CortexaDB.open("agent.mem", dimension=128) ```python # Auto-embedding (requires embedder) -mid1 = db.remember("The user prefers dark mode.") -mid2 = db.remember("User works at Stripe.") +mid1 = db.add("The user prefers dark mode.") +mid2 = db.add("User works at Stripe.") # With metadata -mid3 = db.remember("User's name is Alice.", metadata={"source": "onboarding"}) +mid3 = db.add("User's name is Alice.", metadata={"source": "onboarding"}) ``` ### 4. Query Memories ```python # Semantic search -hits = db.ask("What does the user like?") +hits = db.query("What does the user like?").execute() for hit in hits: print(f"ID: {hit.id}, Score: {hit.score:.3f}") # Retrieve full memory -mem = db.get_memory(hits[0].id) +mem = db.get(hits[0].id) print(mem.content) # b"The user prefers dark mode." ``` @@ -68,12 +68,12 @@ db.load("document.pdf", strategy="recursive") db.ingest("Long article text here...", strategy="markdown") ``` -### 7. Use Namespaces +### 7. Use Collections ```python -agent_a = db.namespace("agent_a") -agent_a.remember("Agent A's private memory") -hits = agent_a.ask("query only agent A's memories") +agent_a = db.collection("agent_a") +agent_a.add("Agent A's private memory") +hits = agent_a.query("query only agent A's memories").execute() ``` --- @@ -90,10 +90,10 @@ cortexadb-core = { git = "https://github.com/anaslimem/CortexaDB.git" } ### 2. Basic Usage ```rust -use cortexadb_core::CortexaDB; +use cortexadb_core::{CortexaDB, CortexaDBBuilder}; fn main() -> Result<(), Box> { - let db = CortexaDB::open("/tmp/agent.mem", 128)?; + let db = CortexaDBBuilder::new("/tmp/agent.mem", 128).build()?; // Store a memory with an embedding let embedding = vec![0.1; 128]; diff --git a/docs/content/docs/guides/collections.mdx b/docs/content/docs/guides/collections.mdx new file mode 100644 index 0000000..dc6b0f2 --- /dev/null +++ b/docs/content/docs/guides/collections.mdx @@ -0,0 +1,160 @@ +--- +title: Collections +description: Multi-agent memory isolation +--- + +Collections allow you to isolate memories between different agents, workspaces, or contexts within a single CortexaDB database file. + +## Overview + +Every memory in CortexaDB belongs to a collection. The default collection is `"default"`. Collections provide: + +- **Isolation** - Queries only return results from the target collection +- **Organization** - Group memories by agent, user, or topic +- **Access Control** - Readonly collections for shared knowledge + +--- + +## Basic Usage + +### Creating a Collection + +```python +# Get a collection handle +agent_a = db.collection("agent_a") +agent_b = db.collection("agent_b") +``` + +### Writing to a Collection + +```python +agent_a.add("Agent A's private memory") +agent_b.add("Agent B's private memory") +``` + +### Querying a Collection + +```python +# Only searches within agent_a's memories +hits = agent_a.query("What do I know?").execute() + +# Only searches within agent_b's memories +hits = agent_b.query("What do I know?").execute() +``` + +### Deleting from a Collection + +```python +agent_a.delete(memory_id) +``` + +### Ingesting Documents + +```python +agent_a.ingest("Long document...", chunk_size=512) +``` + +--- + +## Default Collection + +When you use the top-level `db.add()` and `db.query()`, memories are stored in and queried from the `"default"` collection. + +```python +# These are equivalent: +db.add("text") +db.collection("default").add("text") +``` + +--- + +## Readonly Collections + +You can create readonly collection handles for shared knowledge that shouldn't be modified: + +```python +shared = db.collection("shared_knowledge", readonly=True) + +# Reading works fine +hits = shared.query("query").execute() + +# Writing raises CortexaDBError +shared.add("text") # Error! +``` + +This is useful for multi-agent systems where some agents should only read from a shared knowledge base. + +--- + +## Graph Edge Rules + +Graph edges are collection-scoped. You **cannot** create edges between memories in different collections: + +```python +agent_a = db.collection("agent_a") +agent_b = db.collection("agent_b") + +mid1 = agent_a.add("Memory in A") +mid2 = agent_b.add("Memory in B") + +# This will raise an error - cross-collection edges are forbidden +db.connect(mid1, mid2, "relates_to") +``` + +Graph traversal during queries also respects collection boundaries — BFS will not cross into other collections. + +--- + +## Common Patterns + +### Multi-Agent System + +```python +db = CortexaDB.open("agents.mem", embedder=embedder) + +# Each agent has its own collection +planner = db.collection("planner") +researcher = db.collection("researcher") +writer = db.collection("writer") + +# Agents store memories independently +planner.add("Task: Write a blog post about AI") +researcher.add("Found 3 relevant papers on AI agents") +writer.add("Draft: AI agents are transforming...") + +# Each agent queries only its own memories +planner_context = planner.query("What tasks are pending?").execute() +``` + +### Shared Knowledge Base + +```python +# Admin writes to shared collection +shared = db.collection("shared") +shared.add("Company policy: All code must be reviewed") + +# Agents read from shared collection (readonly) +agent = db.collection("shared", readonly=True) +hits = agent.query("What are the code review rules?").execute() +``` + +### Per-User Memory + +```python +def get_user_memory(db, user_id): + return db.collection(f"user_{user_id}") + +alice = get_user_memory(db, "alice") +alice.add("Alice prefers dark mode") + +bob = get_user_memory(db, "bob") +bob.add("Bob prefers light mode") +``` + +--- + +## Next Steps + +- [Core Concepts](/docs/guides/core-concepts) - How collections fit into the architecture +- [Query Engine](/docs/guides/query-engine) - How collection scoping affects queries +- [Python API](/docs/api/python) - Collection API reference diff --git a/docs/content/docs/guides/meta.json b/docs/content/docs/guides/meta.json index f2dedf9..1c184fd 100644 --- a/docs/content/docs/guides/meta.json +++ b/docs/content/docs/guides/meta.json @@ -6,9 +6,9 @@ "query-engine", "indexing", "chunking", - "namespaces", + "collections", "embedders", "replay", "configuration" ] -} +} \ No newline at end of file diff --git a/docs/content/docs/guides/namespaces.mdx b/docs/content/docs/guides/namespaces.mdx deleted file mode 100644 index 78a3706..0000000 --- a/docs/content/docs/guides/namespaces.mdx +++ /dev/null @@ -1,160 +0,0 @@ ---- -title: Namespaces -description: Multi-agent memory isolation ---- - -Namespaces allow you to isolate memories between different agents, workspaces, or contexts within a single CortexaDB database file. - -## Overview - -Every memory in CortexaDB belongs to a namespace. The default namespace is `"default"`. Namespaces provide: - -- **Isolation** - Queries only return results from the target namespace -- **Organization** - Group memories by agent, user, or topic -- **Access Control** - Readonly namespaces for shared knowledge - ---- - -## Basic Usage - -### Creating a Namespace - -```python -# Get a namespace handle -agent_a = db.namespace("agent_a") -agent_b = db.namespace("agent_b") -``` - -### Writing to a Namespace - -```python -agent_a.remember("Agent A's private memory") -agent_b.remember("Agent B's private memory") -``` - -### Querying a Namespace - -```python -# Only searches within agent_a's memories -hits = agent_a.ask("What do I know?") - -# Only searches within agent_b's memories -hits = agent_b.ask("What do I know?") -``` - -### Deleting from a Namespace - -```python -agent_a.delete_memory(memory_id) -``` - -### Ingesting Documents - -```python -agent_a.ingest_document("Long document...", chunk_size=512) -``` - ---- - -## Default Namespace - -When you use the top-level `db.remember()` and `db.ask()`, memories are stored in and queried from the `"default"` namespace. - -```python -# These are equivalent: -db.remember("text") -db.namespace("default").remember("text") -``` - ---- - -## Readonly Namespaces - -You can create readonly namespace handles for shared knowledge that shouldn't be modified: - -```python -shared = db.namespace("shared_knowledge", readonly=True) - -# Reading works fine -hits = shared.ask("query") - -# Writing raises CortexaDBError -shared.remember("text") # Error! -``` - -This is useful for multi-agent systems where some agents should only read from a shared knowledge base. - ---- - -## Graph Edge Rules - -Graph edges are namespace-scoped. You **cannot** create edges between memories in different namespaces: - -```python -agent_a = db.namespace("agent_a") -agent_b = db.namespace("agent_b") - -mid1 = agent_a.remember("Memory in A") -mid2 = agent_b.remember("Memory in B") - -# This will raise an error - cross-namespace edges are forbidden -db.connect(mid1, mid2, "relates_to") -``` - -Graph traversal during queries also respects namespace boundaries — BFS will not cross into other namespaces. - ---- - -## Common Patterns - -### Multi-Agent System - -```python -db = CortexaDB.open("agents.mem", embedder=embedder) - -# Each agent has its own namespace -planner = db.namespace("planner") -researcher = db.namespace("researcher") -writer = db.namespace("writer") - -# Agents store memories independently -planner.remember("Task: Write a blog post about AI") -researcher.remember("Found 3 relevant papers on AI agents") -writer.remember("Draft: AI agents are transforming...") - -# Each agent queries only its own memories -planner_context = planner.ask("What tasks are pending?") -``` - -### Shared Knowledge Base - -```python -# Admin writes to shared namespace -shared = db.namespace("shared") -shared.remember("Company policy: All code must be reviewed") - -# Agents read from shared namespace (readonly) -agent = db.namespace("shared", readonly=True) -hits = agent.ask("What are the code review rules?") -``` - -### Per-User Memory - -```python -def get_user_memory(db, user_id): - return db.namespace(f"user_{user_id}") - -alice = get_user_memory(db, "alice") -alice.remember("Alice prefers dark mode") - -bob = get_user_memory(db, "bob") -bob.remember("Bob prefers light mode") -``` - ---- - -## Next Steps - -- [Core Concepts](/docs/guides/core-concepts) - How namespaces fit into the architecture -- [Query Engine](/docs/guides/query-engine) - How namespace scoping affects queries -- [Python API](/docs/api/python) - Namespace API reference diff --git a/docs/content/docs/guides/replay.mdx b/docs/content/docs/guides/replay.mdx index 43fb774..638a55c 100644 --- a/docs/content/docs/guides/replay.mdx +++ b/docs/content/docs/guides/replay.mdx @@ -19,10 +19,10 @@ Enable recording by passing a `record` path when opening the database: db = CortexaDB.open("agent.mem", dimension=128, record="session.log") # All operations are now logged -mid1 = db.remember("User likes dark mode", embedding=[...]) -mid2 = db.remember("User works at Stripe", embedding=[...]) +mid1 = db.add("User likes dark mode", vector=[...]) +mid2 = db.add("User works at Stripe", vector=[...]) db.connect(mid1, mid2, "relates_to") -db.delete_memory(mid1) +db.delete(mid1) db.compact() db.checkpoint() ``` diff --git a/docs/content/docs/index.mdx b/docs/content/docs/index.mdx index 30c2a68..6fd9b32 100644 --- a/docs/content/docs/index.mdx +++ b/docs/content/docs/index.mdx @@ -23,7 +23,7 @@ Think of it as **SQLite, but with semantic and relational intelligence for your - [Query Engine](/docs/guides/query-engine) - Hybrid search with vector, graph, and temporal scoring - [Indexing](/docs/guides/indexing) - Exact search vs HNSW approximate nearest neighbor - [Chunking](/docs/guides/chunking) - Document ingestion and chunking strategies -- [Namespaces](/docs/guides/namespaces) - Multi-agent memory isolation +- [Collections](/docs/guides/collections) - Multi-agent memory isolation - [Embedders](/docs/guides/embedders) - Embedding providers (OpenAI, Gemini, Ollama, Hash) - [Replay & Recording](/docs/guides/replay) - Deterministic session recording and replay - [Configuration](/docs/guides/configuration) - All configuration options explained @@ -47,7 +47,7 @@ Think of it as **SQLite, but with semantic and relational intelligence for your - **File Support** - Load TXT, MD, JSON, DOCX, and PDF documents directly - **HNSW Indexing** - Ultra-fast approximate nearest neighbor search via USearch - **Hard Durability** - Write-Ahead Log and segmented storage ensure crash safety -- **Multi-Agent Namespaces** - Isolate memories between agents within a single database file +- **Multi-Agent Collections** - Isolate memories between agents within a single database file - **Deterministic Replay** - Record and replay operations for debugging or migration - **Automatic Capacity Management** - LRU/importance-based eviction with `max_entries` or `max_bytes` @@ -61,10 +61,10 @@ from cortexadb.providers.openai import OpenAIEmbedder db = CortexaDB.open("agent.mem", embedder=OpenAIEmbedder()) -db.remember("The user prefers dark mode.") -db.remember("User works at Stripe.") +db.add("The user prefers dark mode.") +db.add("User works at Stripe.") -hits = db.ask("What does the user like?") +hits = db.query("What does the user like?").execute() for hit in hits: print(f"ID: {hit.id}, Score: {hit.score}") ``` diff --git a/docs/content/docs/resources/benchmarks.mdx b/docs/content/docs/resources/benchmarks.mdx index 7fabc73..fb3d248 100644 --- a/docs/content/docs/resources/benchmarks.mdx +++ b/docs/content/docs/resources/benchmarks.mdx @@ -16,6 +16,18 @@ CortexaDB has been benchmarked with **10,000 embeddings** at **384 dimensions** --- +## Batch Ingestion Performance (v0.1.8) + +CortexaDB `v0.1.8` introduced a new batching architecture. Measured on an M1 Pro Mac with 1,000 chunks of text: + +| Operation | v0.1.6 (Sync) | v0.1.8 (Batch) | Improvement | +|-----------|---------------|----------------|-------------| +| Ingestion | 12.4s | **0.12s** | **103x Faster** | +| Memory Add| 15ms | 1ms | 15x Faster | +| HNSW Search| 0.3ms | 0.28ms | - | + +--- + ## Methodology - **Dataset**: 10,000 embeddings x 384 dimensions (realistic sentence-transformer size) diff --git a/docs/content/docs/resources/examples.mdx b/docs/content/docs/resources/examples.mdx index 2e310d4..c59f6b2 100644 --- a/docs/content/docs/resources/examples.mdx +++ b/docs/content/docs/resources/examples.mdx @@ -14,14 +14,14 @@ from cortexadb.providers.openai import OpenAIEmbedder db = CortexaDB.open("agent.mem", embedder=OpenAIEmbedder()) # Store memories -mid1 = db.remember("The user prefers dark mode.") -mid2 = db.remember("User works at Stripe.") -mid3 = db.remember("User's favorite language is Python.") +mid1 = db.add("The user prefers dark mode.") +mid2 = db.add("User works at Stripe.") +mid3 = db.add("User's favorite language is Python.") # Search -hits = db.ask("What programming language does the user like?") +hits = db.query("What programming language does the user like?").execute() for hit in hits: - mem = db.get_memory(hit.id) + mem = db.get(hit.id) print(f"[{hit.score:.3f}] {mem.content.decode()}") ``` @@ -33,9 +33,9 @@ for hit in hits: db = CortexaDB.open("knowledge.mem", embedder=embedder) # Store entities -alice = db.remember("Alice is a software engineer at Acme Corp") -bob = db.remember("Bob is Alice's manager") -acme = db.remember("Acme Corp builds developer tools") +alice = db.add("Alice is a software engineer at Acme Corp") +bob = db.add("Bob is Alice's manager") +acme = db.add("Acme Corp builds developer tools") # Create relationships db.connect(alice, bob, "reports_to") @@ -43,7 +43,7 @@ db.connect(alice, acme, "works_at") db.connect(bob, acme, "works_at") # Query with graph expansion -hits = db.ask("Who works at Acme?", use_graph=True) +hits = db.query("Who works at Acme?").use_graph().execute() ``` --- @@ -54,25 +54,25 @@ hits = db.ask("Who works at Acme?", use_graph=True) db = CortexaDB.open("agents.mem", embedder=embedder) # Each agent has isolated memory -planner = db.namespace("planner") -researcher = db.namespace("researcher") -writer = db.namespace("writer") +planner = db.collection("planner") +researcher = db.collection("researcher") +writer = db.collection("writer") # Agents store memories independently -planner.remember("Task: Write a blog post about vector databases") -researcher.remember("Found: CortexaDB supports HNSW indexing") -researcher.remember("Found: Typical recall is 95% with HNSW") -writer.remember("Draft intro: Vector databases are transforming AI...") +planner.add("Task: Write a blog post about vector databases") +researcher.add("Found: CortexaDB supports HNSW indexing") +researcher.add("Found: Typical recall is 95% with HNSW") +writer.add("Draft intro: Vector databases are transforming AI...") # Each agent queries only its own memories -research = researcher.ask("What did I find about indexing?") +research = researcher.query("What did I find about indexing?").execute() # Shared knowledge base (readonly for agents) -shared = db.namespace("shared") -shared.remember("Company style guide: Use active voice") +shared = db.collection("shared") +shared.add("Company style guide: Use active voice") -agent_view = db.namespace("shared", readonly=True) -guidelines = agent_view.ask("What is the writing style?") +agent_view = db.collection("shared", readonly=True) +guidelines = agent_view.query("What is the writing style?").execute() ``` --- @@ -94,9 +94,9 @@ Long article about machine learning... ids = db.ingest(article, strategy="semantic", chunk_size=2048) # Query across all ingested documents -hits = db.ask("How do I configure the API?", top_k=10) +hits = db.query("How do I configure the API?").limit(10).execute() for hit in hits: - mem = db.get_memory(hit.id) + mem = db.get(hit.id) print(f"[{hit.score:.3f}] {mem.content.decode()[:100]}...") ``` @@ -107,8 +107,8 @@ for hit in hits: ```python # Record a session db = CortexaDB.open("agent.mem", embedder=embedder, record="session.log") -db.remember("User asked about pricing") -db.remember("Showed enterprise plan") +db.add("User asked about pricing") +db.add("Showed enterprise plan") db.connect(1, 2, "led_to") # Later: replay the session for debugging @@ -164,7 +164,7 @@ db = CortexaDB.open( # Old, low-importance memories are automatically evicted for i in range(20000): - db.remember(f"Memory #{i}") + db.add(f"Memory #{i}") stats = db.stats() print(f"Entries: {stats.entries}") # ~10000 (eviction kicked in) @@ -178,14 +178,14 @@ print(f"Entries: {stats.entries}") # ~10000 (eviction kicked in) db = CortexaDB.open("agent.mem", embedder=embedder) # Store with metadata -db.remember("Dark mode enabled", metadata={"category": "preference"}) -db.remember("Meeting at 3pm", metadata={"category": "schedule"}) -db.remember("Likes Python", metadata={"category": "preference"}) +db.add("Dark mode enabled", metadata={"category": "preference"}) +db.add("Meeting at 3pm", metadata={"category": "schedule"}) +db.add("Likes Python", metadata={"category": "preference"}) # Filter by metadata (if supported by your query) -hits = db.ask("What are the user's preferences?") +hits = db.query("What are the user's preferences?").execute() for hit in hits: - mem = db.get_memory(hit.id) + mem = db.get(hit.id) print(f"{mem.content.decode()} [{mem.metadata}]") ``` diff --git a/examples/python/basic_usage.py b/examples/python/basic_usage.py index d31d659..053d9cb 100644 --- a/examples/python/basic_usage.py +++ b/examples/python/basic_usage.py @@ -22,7 +22,7 @@ def main(): if os.path.isdir(db_path): shutil.rmtree(db_path) - print("=== CortexaDB Python Example (v0.1.7) ===\n") + print("=== CortexaDB Python Example (v0.1.8) ===\n") # 1. Open database with embedder (auto-embeds text) # HashEmbedder generates deterministic embeddings for testing @@ -56,7 +56,7 @@ def main(): Third paragraph to complete the example. """ - # v0.1.7 uses optimized batch insertion internally + # v0.1.8 uses optimized batch insertion internally ids = db.ingest(long_text, strategy="recursive", chunk_size=100, overlap=10) print(f" Recursive batching: {len(ids)} chunks stored in ms")