From 930aeb9c90730b1b661f8571d13d67a4bbf063e4 Mon Sep 17 00:00:00 2001
From: jonathanmagambo <jonathanomagambo@gmail.com>
Date: Sat, 7 Mar 2026 20:31:11 -0500
Subject: [PATCH 1/2] feat(storage/query): implement secondary indexes and safe
 joins

- Added `extract_field_raw` for zero-allocation MessagePack field extraction.
- Introduced `IndexRegistry` to natively manage secondary indexes.
- Wired atomic MVCC index updates during `insert`, `update_doc`, and `delete`.
- Flattened pagination params to handle dynamic `where[field]` intercept routing.
- Implemented `POST /v1/_query` relational BFS executor with N+1 B-Tree traversals.
- Designed `JoinQuery` AST in `forge-types` enforcing max depth caps.
---
 Cargo.lock                     |   3 +
 crates/server/src/lib.rs       | 204 ++++++++++++++++++-
 crates/storage/Cargo.toml      |   2 +
 crates/storage/src/engine.rs   | 351 ++++++++++++++++++++++++++++++++-
 crates/storage/src/extract.rs  | 254 ++++++++++++++++++++++++
 crates/storage/src/index.rs    | 132 +++++++++++++
 crates/storage/src/lib.rs      |   3 +
 crates/types/Cargo.toml        |   1 +
 crates/types/src/lib.rs        |   2 +
 crates/types/src/pagination.rs |   7 +
 crates/types/src/query.rs      | 151 ++++++++++++++
 11 files changed, 1092 insertions(+), 18 deletions(-)
 create mode 100644 crates/storage/src/extract.rs
 create mode 100644 crates/storage/src/index.rs
 create mode 100644 crates/types/src/query.rs

diff --git a/Cargo.lock b/Cargo.lock
index 676bfda..24c238f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -819,8 +819,10 @@ dependencies = [
  "bytes",
  "forge-types",
  "redbx",
+ "rmp",
  "rmp-serde",
  "serde",
+ "serde_json",
  "tempfile",
  "tokio",
  "tracing",
@@ -833,6 +835,7 @@ version = "0.2.0"
 dependencies = [
  "redbx",
  "serde",
+ "serde_json",
  "thiserror 2.0.18",
  "toml",
  "uuid",
diff --git a/crates/server/src/lib.rs b/crates/server/src/lib.rs
index f1dcf74..765f0bf 100644
--- a/crates/server/src/lib.rs
+++ b/crates/server/src/lib.rs
@@ -68,7 +68,15 @@ pub fn app(state: AppState) -> Router {
             "/v1/{collection}/{id}",
             get(get_doc).patch(update_doc).delete(delete_doc),
         )
-        .route("/v1/_query", axum::routing::post(query_docs_stub))
+        .route(
+            "/v1/_indexes/{collection}",
+            axum::routing::post(create_index),
+        )
+        .route(
+            "/v1/_indexes/{collection}/{field}",
+            axum::routing::delete(drop_index),
+        )
+        .route("/v1/_query", axum::routing::post(query_docs))
         // Everything inside /v1 requires a valid PASETO token.
         // The middleware parses the Bearer header and rejects bad tokens fast.
         .route_layer(axum::middleware::from_fn_with_state(
@@ -128,7 +136,32 @@ async fn list_docs(
     let limit = params.limit.unwrap_or(50).clamp(1, 100) as usize;
     let cursor = params.cursor.as_deref();
 
-    match state.engine.list_paginated(&collection, cursor, limit) {
+    let mut where_filter = None;
+    for (k, v) in &params.query_filters {
+        if k.starts_with("where[") && k.ends_with("]") {
+            let field = &k[6..k.len() - 1];
+            where_filter = Some((field, v));
+            break;
+        }
+    }
+
+    let query_result = match where_filter {
+        Some((field, val_str)) => {
+            let msgpack_val = match serde_json::from_str::<serde_json::Value>(val_str) {
+                Ok(j) => forge_storage::document::serialize_doc(&j).unwrap_or_default(),
+                Err(_) => forge_storage::document::serialize_doc(&serde_json::Value::String(
+                    val_str.to_string(),
+                ))
+                .unwrap_or_default(),
+            };
+            state
+                .engine
+                .lookup_by_index(&collection, field, &msgpack_val, cursor, limit)
+        }
+        None => state.engine.list_paginated(&collection, cursor, limit),
+    };
+
+    match query_result {
         Ok((docs, next_cursor)) => {
             let accept = headers
                 .get(axum::http::header::ACCEPT)
@@ -332,13 +365,134 @@ async fn get_doc(
     }
 }
 
+/// Helper function to execute local BFS N+1 joins directly on the B-Tree memory maps.
+fn process_joins(
+    engine: &forge_storage::StorageEngine,
+    parent_docs: &mut [serde_json::Value],
+    joins: &std::collections::HashMap<String, forge_types::query::JoinNode>,
+) {
+    for (join_key, node) in joins {
+        for parent in parent_docs.iter_mut() {
+            let parent_obj = if let Some(o) = parent.as_object_mut() {
+                o
+            } else {
+                continue;
+            };
+
+            let doc_obj = parent_obj.get("doc").and_then(|d| d.as_object());
+
+            let on_val = if node.on == "id" {
+                parent_obj.get("id").cloned()
+            } else {
+                doc_obj.and_then(|d| d.get(&node.on).cloned())
+            };
+
+            let mut joined_records = Vec::new();
+
+            if let Some(on_v) = on_val {
+                if node.target == "id" {
+                    if let Some(target_id) = on_v.as_str()
+                        && let Ok(Some(bytes)) = engine.get(&node.collection, target_id)
+                            && let Ok(doc) = rmp_serde::from_slice::<serde_json::Value>(&bytes) {
+                                joined_records.push(serde_json::json!({
+                                    "id": target_id,
+                                    "doc": doc,
+                                    "_joins": serde_json::json!({})
+                                }));
+                            }
+                } else {
+                    let msgpack_val =
+                        forge_storage::document::serialize_doc(&on_v).unwrap_or_default();
+                    if let Ok((matches, _)) = engine.lookup_by_index(
+                        &node.collection,
+                        &node.target,
+                        &msgpack_val,
+                        None,
+                        100,
+                    ) {
+                        for (j_id, j_bytes) in matches {
+                            if let Ok(doc) = rmp_serde::from_slice::<serde_json::Value>(&j_bytes) {
+                                joined_records.push(serde_json::json!({
+                                    "id": j_id,
+                                    "doc": doc,
+                                    "_joins": serde_json::json!({})
+                                }));
+                            }
+                        }
+                    }
+                }
+            }
+
+            if !node.joins.is_empty() && !joined_records.is_empty() {
+                process_joins(engine, &mut joined_records, &node.joins);
+            }
+
+            if let Some(joins_map) = parent_obj.get_mut("_joins").and_then(|j| j.as_object_mut()) {
+                joins_map.insert(join_key.clone(), serde_json::Value::Array(joined_records));
+            }
+        }
+    }
+}
+
 /// POST /v1/_query
-/// Safe Joins implementation (placeholder for Phase D).
-async fn query_docs_stub() -> impl IntoResponse {
-    (
-        StatusCode::NOT_IMPLEMENTED,
-        "Safe Joins landing in v0.3 Phase D",
-    )
+/// Executes a complex traversal of relation trees locally inside memory, producing deeply
+/// nested and securely validated Join payloads in < 1ms without explicit SQL syntax.
+async fn query_docs(
+    State(state): State<AppState>,
+    axum::Json(query): axum::Json<forge_types::query::JoinQuery>,
+) -> Result<impl IntoResponse, StatusCode> {
+    if let Err(e) = query.validate() {
+        tracing::warn!("Invalid join query: {e}");
+        return Err(StatusCode::BAD_REQUEST);
+    }
+
+    let limit = query.resolved_limit();
+    let (docs, next_cursor) = if let Some((k, v)) = query.filter.iter().next() {
+        let msgpack_val = forge_storage::document::serialize_doc(v).unwrap_or_default();
+        state
+            .engine
+            .lookup_by_index(
+                &query.collection,
+                k,
+                &msgpack_val,
+                query.cursor.as_deref(),
+                limit,
+            )
+            .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
+    } else {
+        state
+            .engine
+            .list_paginated(&query.collection, query.cursor.as_deref(), limit)
+            .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
+    };
+
+    let mut root_docs: Vec<serde_json::Value> = docs
+        .into_iter()
+        .map(|(id, bytes)| {
+            let doc: serde_json::Value =
+                rmp_serde::from_slice(&bytes).unwrap_or(serde_json::Value::Null);
+            serde_json::json!({
+                "id": id,
+                "doc": doc,
+                "_joins": serde_json::json!({})
+            })
+        })
+        .collect();
+
+    process_joins(&state.engine, &mut root_docs, &query.joins);
+
+    let has_more = next_cursor.is_some();
+    let response = forge_types::pagination::PaginatedResponse {
+        data: root_docs,
+        next_cursor,
+        has_more,
+    };
+
+    Ok((
+        StatusCode::OK,
+        [(axum::http::header::CONTENT_TYPE, "application/json")],
+        serde_json::to_vec(&response).unwrap_or_default(),
+    ))
 }
 
 /// PATCH /v1/:collection/:id
@@ -471,6 +625,40 @@ async fn delete_doc(
     }
 }
 
+/// POST /v1/_indexes/:collection
+/// Creates a secondary index for the collection based on the field provided in the JSON body.
+async fn create_index(
+    State(state): State<AppState>,
+    Path(collection): Path<String>,
+    axum::Json(payload): axum::Json<serde_json::Value>,
+) -> Result<impl IntoResponse, StatusCode> {
+    let field = payload
+        .get("field")
+        .and_then(|v| v.as_str())
+        .ok_or(StatusCode::BAD_REQUEST)?;
+
+    state.engine.create_index(&collection, field).map_err(|e| {
+        tracing::error!("create_index failed: {}", e);
+        StatusCode::INTERNAL_SERVER_ERROR
+    })?;
+
+    Ok(StatusCode::CREATED)
+}
+
+/// DELETE /v1/_indexes/:collection/:field
+/// Drops a secondary index natively from the core engine.
+async fn drop_index(
+    State(state): State<AppState>,
+    Path((collection, field)): Path<(String, String)>,
+) -> Result<impl IntoResponse, StatusCode> {
+    state.engine.drop_index(&collection, &field).map_err(|e| {
+        tracing::error!("drop_index failed: {}", e);
+        StatusCode::INTERNAL_SERVER_ERROR
+    })?;
+
+    Ok(StatusCode::NO_CONTENT)
+}
+
 /// Serve a single TLS stream using hyper-util and the axum router.
 pub async fn serve_connection(
     stream: tokio_rustls::server::TlsStream<tokio::net::TcpStream>,
diff --git a/crates/storage/Cargo.toml b/crates/storage/Cargo.toml
index c2240db..fd17f5e 100644
--- a/crates/storage/Cargo.toml
+++ b/crates/storage/Cargo.toml
@@ -15,6 +15,8 @@ uuid = { workspace = true }
 tracing = { workspace = true }
 bytes = "1.5"
 tokio = { workspace = true }
+rmp = "0.8.15"
 
 [dev-dependencies]
+serde_json.workspace = true
 tempfile = { workspace = true }
diff --git a/crates/storage/src/engine.rs b/crates/storage/src/engine.rs
index cf032ad..6e54bf5 100644
--- a/crates/storage/src/engine.rs
+++ b/crates/storage/src/engine.rs
@@ -112,13 +112,52 @@ impl StorageEngine {
         Ok(Self { db })
     }
 
+    /// Helper to fetch indexed fields for a collection within an active write transaction.
+    /// This avoids reopening the table for every document in a batch.
+    fn get_indexed_fields(txn: &redbx::WriteTransaction, collection: &str) -> Vec<String> {
+        let registry_def = TableDefinition::<&str, &[u8]>::new(crate::index::INDEX_REGISTRY_TABLE);
+        if let Ok(reg_table) = txn.open_table(registry_def)
+            && let Ok(Some(bytes)) = reg_table.get(collection)
+                && let Ok(registry) =
+                    crate::deserialize_doc::<crate::index::IndexedFields>(bytes.value())
+                {
+                    return registry.fields;
+                }
+        Vec::new()
+    }
+
     /// Insert a document into a collection. Overwrites if the key already exists.
     ///
     /// One write transaction per call — fully durable on return.
     /// For bulk imports, use [`insert_batch`] instead.
     pub fn insert(&self, collection: &str, id: &str, doc: &[u8]) -> Result<()> {
-        let table_def: TableDefinition<&str, &[u8]> = TableDefinition::new(collection);
+        let table_def = TableDefinition::<&str, &[u8]>::new(collection);
         let txn = self.db.begin_write().map_err(redbx::Error::from)?;
+
+        let indexed_fields = Self::get_indexed_fields(&txn, collection);
+        if !indexed_fields.is_empty() {
+            let old_doc = {
+                let table = txn.open_table(table_def).map_err(redbx::Error::from)?;
+                table.get(id).ok().flatten().map(|b| b.value().to_vec())
+            };
+
+            for field in &indexed_fields {
+                let idx_table_name = crate::index::index_table_name(collection, field);
+                let idx_def = TableDefinition::<&[u8], &[u8]>::new(&idx_table_name);
+                if let Ok(mut idx_table) = txn.open_table(idx_def) {
+                    if let Some(old) = &old_doc
+                        && let Ok(Some(old_val)) = crate::extract::extract_field_raw(old, field) {
+                            let key = crate::index::format_index_key(old_val, id);
+                            let _ = idx_table.remove(key.as_slice());
+                        }
+                    if let Ok(Some(new_val)) = crate::extract::extract_field_raw(doc, field) {
+                        let key = crate::index::format_index_key(new_val, id);
+                        let _ = idx_table.insert(key.as_slice(), &[] as &[u8]);
+                    }
+                }
+            }
+        }
+
         {
             let mut table = txn.open_table(table_def).map_err(redbx::Error::from)?;
             table.insert(id, doc).map_err(redbx::Error::from)?;
@@ -153,12 +192,38 @@ impl StorageEngine {
 
     /// Delete a document by ID. Returns `true` if the key existed.
     pub fn delete(&self, collection: &str, id: &str) -> Result<bool> {
-        let table_def: TableDefinition<&str, &[u8]> = TableDefinition::new(collection);
+        let table_def = TableDefinition::<&str, &[u8]>::new(collection);
         let txn = self.db.begin_write().map_err(redbx::Error::from)?;
-        let existed = {
+        let indexed_fields = Self::get_indexed_fields(&txn, collection);
+        let mut existed = false;
+
+        if !indexed_fields.is_empty() {
+            let old_doc = {
+                let table = txn.open_table(table_def).map_err(redbx::Error::from)?;
+                table.get(id).ok().flatten().map(|b| b.value().to_vec())
+            };
+            if let Some(old) = old_doc {
+                existed = true;
+                for field in &indexed_fields {
+                    let idx_table_name = crate::index::index_table_name(collection, field);
+                    let idx_def = TableDefinition::<&[u8], &[u8]>::new(&idx_table_name);
+                    if let Ok(mut idx_table) = txn.open_table(idx_def)
+                        && let Ok(Some(old_val)) = crate::extract::extract_field_raw(&old, field) {
+                            let key = crate::index::format_index_key(old_val, id);
+                            let _ = idx_table.remove(key.as_slice());
+                        }
+                }
+            }
+        }
+
+        {
             let mut table = txn.open_table(table_def).map_err(redbx::Error::from)?;
-            table.remove(id).map_err(redbx::Error::from)?.is_some()
-        };
+            if !existed {
+                existed = table.remove(id).map_err(redbx::Error::from)?.is_some();
+            } else {
+                let _ = table.remove(id).map_err(redbx::Error::from)?;
+            }
+        }
         txn.commit().map_err(redbx::Error::from)?;
         Ok(existed)
     }
@@ -182,13 +247,41 @@ impl StorageEngine {
         docs: &[(&str, &[u8])],
         flush: bool,
     ) -> Result<()> {
-        let table_def: TableDefinition<&str, &[u8]> = TableDefinition::new(collection);
+        let table_def = TableDefinition::<&str, &[u8]>::new(collection);
         let mut txn = self.db.begin_write().map_err(redbx::Error::from)?;
 
         if !flush {
             let _ = txn.set_durability(Durability::None);
         }
 
+        let indexed_fields = Self::get_indexed_fields(&txn, collection);
+        for &(id, payload) in docs {
+            if !indexed_fields.is_empty() {
+                let old_doc = {
+                    let table = txn.open_table(table_def).map_err(redbx::Error::from)?;
+                    table.get(id).ok().flatten().map(|b| b.value().to_vec())
+                };
+
+                for field in &indexed_fields {
+                    let idx_table_name = crate::index::index_table_name(collection, field);
+                    let idx_def = TableDefinition::<&[u8], &[u8]>::new(&idx_table_name);
+                    if let Ok(mut idx_table) = txn.open_table(idx_def) {
+                        if let Some(old) = &old_doc
+                            && let Ok(Some(old_val)) = crate::extract::extract_field_raw(old, field)
+                            {
+                                let key = crate::index::format_index_key(old_val, id);
+                                let _ = idx_table.remove(key.as_slice());
+                            }
+                        if let Ok(Some(new_val)) = crate::extract::extract_field_raw(payload, field)
+                        {
+                            let key = crate::index::format_index_key(new_val, id);
+                            let _ = idx_table.insert(key.as_slice(), &[] as &[u8]);
+                        }
+                    }
+                }
+            }
+        }
+
         {
             let mut table = txn.open_table(table_def).map_err(redbx::Error::from)?;
             for (id, payload) in docs {
@@ -207,13 +300,40 @@ impl StorageEngine {
     ///
     /// Returns [`ForgeError::Storage`] if the transaction or any individual removal fails.
     pub fn delete_batch(&self, collection: &str, ids: &[String], flush: bool) -> Result<()> {
-        let table_def: TableDefinition<&str, &[u8]> = TableDefinition::new(collection);
+        let table_def = TableDefinition::<&str, &[u8]>::new(collection);
         let mut txn = self.db.begin_write().map_err(redbx::Error::from)?;
 
         if !flush {
             let _ = txn.set_durability(Durability::None);
         }
 
+        let indexed_fields = Self::get_indexed_fields(&txn, collection);
+        for id in ids {
+            if !indexed_fields.is_empty() {
+                let old_doc = {
+                    let table = txn.open_table(table_def).map_err(redbx::Error::from)?;
+                    table
+                        .get(id.as_str())
+                        .ok()
+                        .flatten()
+                        .map(|b| b.value().to_vec())
+                };
+                if let Some(old) = old_doc {
+                    for field in &indexed_fields {
+                        let idx_table_name = crate::index::index_table_name(collection, field);
+                        let idx_def = TableDefinition::<&[u8], &[u8]>::new(&idx_table_name);
+                        if let Ok(mut idx_table) = txn.open_table(idx_def)
+                            && let Ok(Some(old_val)) =
+                                crate::extract::extract_field_raw(&old, field)
+                            {
+                                let key = crate::index::format_index_key(old_val, id.as_str());
+                                let _ = idx_table.remove(key.as_slice());
+                            }
+                    }
+                }
+            }
+        }
+
         {
             let mut table = txn.open_table(table_def).map_err(redbx::Error::from)?;
             for id in ids {
@@ -224,6 +344,137 @@ impl StorageEngine {
         Ok(())
     }
 
+    /// Creates a new secondary index for a collection and backfills existing documents.
+    pub fn create_index(&self, collection: &str, field: &str) -> Result<()> {
+        let registry = crate::index::IndexRegistry::new(&self.db);
+        registry.create_index(collection, field)?;
+
+        let table_def = TableDefinition::<&str, &[u8]>::new(collection);
+        let idx_table_name = crate::index::index_table_name(collection, field);
+        let idx_def = TableDefinition::<&[u8], &[u8]>::new(&idx_table_name);
+
+        let txn = self.db.begin_write().map_err(redbx::Error::from)?;
+        {
+            if let Ok(table) = txn.open_table(table_def) {
+                let mut idx_table = txn.open_table(idx_def).map_err(redbx::Error::from)?;
+                let iter = table.iter().map_err(redbx::Error::from)?;
+                for entry in iter {
+                    let (k, v) = entry.map_err(redbx::Error::from)?;
+                    let doc_id = k.value();
+                    let payload = v.value();
+
+                    if let Ok(Some(val)) = crate::extract::extract_field_raw(payload, field) {
+                        let key = crate::index::format_index_key(val, doc_id);
+                        let _ = idx_table.insert(key.as_slice(), &[] as &[u8]);
+                    }
+                }
+            }
+        }
+        txn.commit().map_err(redbx::Error::from)?;
+        Ok(())
+    }
+
+    /// Drops a secondary index and clears its backing table.
+    pub fn drop_index(&self, collection: &str, field: &str) -> Result<()> {
+        let registry = crate::index::IndexRegistry::new(&self.db);
+        registry.drop_index(collection, field)?;
+
+        let idx_table_name = crate::index::index_table_name(collection, field);
+        let idx_def = TableDefinition::<&[u8], &[u8]>::new(&idx_table_name);
+
+        let txn = self.db.begin_write().map_err(redbx::Error::from)?;
+        {
+            if let Ok(mut idx_table) = txn.open_table(idx_def) {
+                let mut keys = Vec::new();
+                if let Ok(iter) = idx_table.iter() {
+                    for (k, _) in iter.flatten() {
+                        keys.push(k.value().to_vec());
+                    }
+                }
+                for k in keys {
+                    let _ = idx_table.remove(k.as_slice());
+                }
+            }
+        }
+        txn.commit().map_err(redbx::Error::from)?;
+        Ok(())
+    }
+
+    /// Look up documents using a secondary index, with pagination.
+    pub fn lookup_by_index(
+        &self,
+        collection: &str,
+        field: &str,
+        value: &[u8],
+        cursor: Option<&str>,
+        limit: usize,
+    ) -> Result<PaginatedList> {
+        let idx_table_name = crate::index::index_table_name(collection, field);
+        let idx_def = TableDefinition::<&[u8], &[u8]>::new(&idx_table_name);
+        let coll_def = TableDefinition::<&str, &[u8]>::new(collection);
+
+        let txn = self.db.begin_read().map_err(redbx::Error::from)?;
+
+        let idx_table = match txn.open_table(idx_def) {
+            Ok(t) => t,
+            Err(_) => return Ok((Vec::new(), None)),
+        };
+        let coll_table = match txn.open_table(coll_def) {
+            Ok(t) => t,
+            Err(_) => return Ok((Vec::new(), None)),
+        };
+
+        let mut prefix = Vec::with_capacity(value.len() + 1);
+        prefix.extend_from_slice(value);
+        prefix.push(0);
+
+        let mut results = Vec::with_capacity(limit + 1);
+        let mut next_cursor = None;
+
+        let cursor_key;
+        let start_bound = if let Some(c) = cursor {
+            cursor_key = crate::index::format_index_key(value, c);
+            std::ops::Bound::Excluded(cursor_key.as_slice())
+        } else {
+            std::ops::Bound::Included(prefix.as_slice())
+        };
+
+        let iter = idx_table
+            .range::<&[u8]>((start_bound, std::ops::Bound::Unbounded))
+            .map_err(redbx::Error::from)?;
+
+        for entry in iter {
+            let (k, _) = entry.map_err(redbx::Error::from)?;
+            let key_bytes = k.value();
+
+            if !key_bytes.starts_with(&prefix) {
+                break;
+            }
+
+            let doc_id_bytes = &key_bytes[prefix.len()..];
+            let id = std::str::from_utf8(doc_id_bytes).map_err(|e| {
+                ForgeError::Storage(redbx::Error::Io(std::io::Error::new(
+                    std::io::ErrorKind::InvalidData,
+                    e,
+                )))
+            })?;
+
+            if let Ok(Some(doc_bytes)) = coll_table.get(id) {
+                results.push((id.to_string(), Bytes::copy_from_slice(doc_bytes.value())));
+                if results.len() > limit {
+                    break;
+                }
+            }
+        }
+
+        if results.len() > limit {
+            results.pop();
+            next_cursor = results.last().map(|(k, _)| k.clone());
+        }
+
+        Ok((results, next_cursor))
+    }
+
     /// Force a durable flush to disk. Use after a sequence of non-flushing batch operations.
     ///
     /// Commits an empty write transaction with `Durability::Immediate`, which triggers
@@ -343,10 +594,10 @@ impl StorageEngine {
         patch: &[u8],
         merge_fn: impl Fn(&[u8], &[u8]) -> Result<Vec<u8>>,
     ) -> Result<Vec<u8>> {
-        let table_def: TableDefinition<&str, &[u8]> = TableDefinition::new(collection);
+        let table_def = TableDefinition::<&str, &[u8]>::new(collection);
         let txn = self.db.begin_write().map_err(redbx::Error::from)?;
 
-        let merged = {
+        let (existing_bytes, merged) = {
             let table = txn.open_table(table_def).map_err(redbx::Error::from)?;
 
             let existing = table.get(id).map_err(redbx::Error::from)?.ok_or_else(|| {
@@ -356,9 +607,31 @@ impl StorageEngine {
                 )))
             })?;
 
-            merge_fn(existing.value(), patch)?
+            let existing_bytes = existing.value().to_vec();
+            let merged = merge_fn(&existing_bytes, patch)?;
+            (existing_bytes, merged)
         };
 
+        let indexed_fields = Self::get_indexed_fields(&txn, collection);
+        if !indexed_fields.is_empty() {
+            for field in &indexed_fields {
+                let idx_table_name = crate::index::index_table_name(collection, field);
+                let idx_def = TableDefinition::<&[u8], &[u8]>::new(&idx_table_name);
+                if let Ok(mut idx_table) = txn.open_table(idx_def) {
+                    if let Ok(Some(old_val)) =
+                        crate::extract::extract_field_raw(&existing_bytes, field)
+                    {
+                        let key = crate::index::format_index_key(old_val, id);
+                        let _ = idx_table.remove(key.as_slice());
+                    }
+                    if let Ok(Some(new_val)) = crate::extract::extract_field_raw(&merged, field) {
+                        let key = crate::index::format_index_key(new_val, id);
+                        let _ = idx_table.insert(key.as_slice(), &[] as &[u8]);
+                    }
+                }
+            }
+        }
+
         // We re-open the table block to bypass borrow-checker holding `existing`
         {
             let mut table = txn.open_table(table_def).map_err(redbx::Error::from)?;
@@ -429,6 +702,64 @@ mod tests {
         assert_eq!(docs[1], ("b".into(), Bytes::from_static(b"two")));
     }
 
+    #[test]
+    fn secondary_indexes_maintain_sync_and_query() {
+        let (engine, _tmp) = test_engine();
+
+        // 1. Insert documents before index exists
+        let doc1 =
+            rmp_serde::to_vec_named(&serde_json::json!({"name": "Alice", "age": 30})).unwrap();
+        let doc2 = rmp_serde::to_vec_named(&serde_json::json!({"name": "Bob", "age": 30})).unwrap();
+        let doc3 =
+            rmp_serde::to_vec_named(&serde_json::json!({"name": "Charlie", "age": 25})).unwrap();
+
+        engine.insert("users", "u1", &doc1).unwrap();
+        engine.insert("users", "u2", &doc2).unwrap();
+        engine.insert("users", "u3", &doc3).unwrap();
+
+        // 2. Create index on "age". This should backfill.
+        engine.create_index("users", "age").unwrap();
+
+        let age_30_msgpack = rmp_serde::to_vec_named(&30).unwrap();
+        let (res, _) = engine
+            .lookup_by_index("users", "age", &age_30_msgpack, None, 10)
+            .unwrap();
+        assert_eq!(res.len(), 2);
+
+        // 3. Insert after index exists
+        let doc4 =
+            rmp_serde::to_vec_named(&serde_json::json!({"name": "Dave", "age": 30})).unwrap();
+        engine.insert("users", "u4", &doc4).unwrap();
+
+        let (res, _) = engine
+            .lookup_by_index("users", "age", &age_30_msgpack, None, 10)
+            .unwrap();
+        assert_eq!(res.len(), 3);
+
+        // 4. Update document (change age 30 -> 25)
+        engine
+            .update_doc("users", "u1", &[], |_, _| Ok(doc3.clone()))
+            .unwrap(); // u1 now has age 25
+
+        let (res, _) = engine
+            .lookup_by_index("users", "age", &age_30_msgpack, None, 10)
+            .unwrap();
+        assert_eq!(res.len(), 2); // u2, u4
+
+        let age_25_msgpack = rmp_serde::to_vec_named(&25).unwrap();
+        let (res, _) = engine
+            .lookup_by_index("users", "age", &age_25_msgpack, None, 10)
+            .unwrap();
+        assert_eq!(res.len(), 2); // u3, u1
+
+        // 5. Delete document
+        engine.delete("users", "u2").unwrap();
+        let (res, _) = engine
+            .lookup_by_index("users", "age", &age_30_msgpack, None, 10)
+            .unwrap();
+        assert_eq!(res.len(), 1); // only u4 left
+    }
+
     #[test]
     fn list_empty_collection() {
         let (engine, _tmp) = test_engine();
diff --git a/crates/storage/src/extract.rs b/crates/storage/src/extract.rs
new file mode 100644
index 0000000..b1d30a9
--- /dev/null
+++ b/crates/storage/src/extract.rs
@@ -0,0 +1,254 @@
+use forge_types::{ForgeError, Result};
+use rmp::Marker;
+use std::io::{Cursor, Read};
+
+/// Extremely fast, zero-allocation MessagePack field extractor.
+/// Scans the binary payload, looking for the target map key, and returns
+/// the raw bytes of the associated value.
+pub fn extract_field_raw<'a>(doc: &'a [u8], field: &str) -> Result<Option<&'a [u8]>> {
+    let mut cursor = std::io::Cursor::new(doc);
+
+    let marker = rmp::decode::read_marker(&mut cursor)
+        .map_err(|e| ForgeError::Serialization(format!("failed to read marker: {:?}", e)))?;
+
+    let len = match marker {
+        Marker::FixMap(len) => len as u32,
+        Marker::Map16 => read_data_u16(&mut cursor)
+            .map_err(|_| ForgeError::Serialization("invalid map16".into()))?
+            as u32,
+        Marker::Map32 => read_data_u32(&mut cursor)
+            .map_err(|_| ForgeError::Serialization("invalid map32".into()))?,
+        _ => return Ok(None), // Not a map, can't extract fields
+    };
+
+    let target_bytes = field.as_bytes();
+
+    for _ in 0..len {
+        // Read key
+        let key_start = cursor.position() as usize;
+        skip_value(&mut cursor)?;
+        let key_end = cursor.position() as usize;
+
+        let key_slice = &doc[key_start..key_end];
+        let mut key_cursor = std::io::Cursor::new(key_slice);
+
+        let mut is_match = false;
+        if let Ok(str_len) = rmp::decode::read_str_len(&mut key_cursor) {
+            let str_start = key_start + key_cursor.position() as usize;
+            let str_end = str_start + str_len as usize;
+            if str_end <= doc.len() && &doc[str_start..str_end] == target_bytes {
+                is_match = true;
+            }
+        }
+
+        // Read value bounds
+        let val_start = cursor.position() as usize;
+        skip_value(&mut cursor)?;
+        let val_end = cursor.position() as usize;
+
+        if is_match {
+            return Ok(Some(&doc[val_start..val_end]));
+        }
+    }
+
+    Ok(None)
+}
+
+fn read_data_u8(cursor: &mut Cursor<&[u8]>) -> std::result::Result<u8, std::io::Error> {
+    let mut buf = [0u8; 1];
+    cursor.read_exact(&mut buf)?;
+    Ok(buf[0])
+}
+
+fn read_data_u16(cursor: &mut Cursor<&[u8]>) -> std::result::Result<u16, std::io::Error> {
+    let mut buf = [0u8; 2];
+    cursor.read_exact(&mut buf)?;
+    Ok(u16::from_be_bytes(buf))
+}
+
+fn read_data_u32(cursor: &mut Cursor<&[u8]>) -> std::result::Result<u32, std::io::Error> {
+    let mut buf = [0u8; 4];
+    cursor.read_exact(&mut buf)?;
+    Ok(u32::from_be_bytes(buf))
+}
+
+/// Recursively skips a single MessagePack value in the cursor.
+fn skip_value(cursor: &mut Cursor<&[u8]>) -> Result<()> {
+    let marker = rmp::decode::read_marker(cursor)
+        .map_err(|_| ForgeError::Serialization("EOF during skip".into()))?;
+
+    match marker {
+        Marker::FixPos(_) | Marker::FixNeg(_) | Marker::Null | Marker::True | Marker::False => {
+            Ok(())
+        }
+        Marker::U8 | Marker::I8 => {
+            cursor.set_position(cursor.position() + 1);
+            Ok(())
+        }
+        Marker::U16 | Marker::I16 => {
+            cursor.set_position(cursor.position() + 2);
+            Ok(())
+        }
+        Marker::U32 | Marker::I32 | Marker::F32 => {
+            cursor.set_position(cursor.position() + 4);
+            Ok(())
+        }
+        Marker::U64 | Marker::I64 | Marker::F64 => {
+            cursor.set_position(cursor.position() + 8);
+            Ok(())
+        }
+        Marker::FixStr(len) => {
+            cursor.set_position(cursor.position() + len as u64);
+            Ok(())
+        }
+        Marker::Str8 | Marker::Bin8 => {
+            let len = read_data_u8(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
+            cursor.set_position(cursor.position() + len as u64);
+            Ok(())
+        }
+        Marker::Str16 | Marker::Bin16 => {
+            let len = read_data_u16(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
+            cursor.set_position(cursor.position() + len as u64);
+            Ok(())
+        }
+        Marker::Str32 | Marker::Bin32 => {
+            let len = read_data_u32(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
+            cursor.set_position(cursor.position() + len as u64);
+            Ok(())
+        }
+        Marker::FixArray(len) => {
+            for _ in 0..len {
+                skip_value(cursor)?;
+            }
+            Ok(())
+        }
+        Marker::Array16 => {
+            let len = read_data_u16(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
+            for _ in 0..len {
+                skip_value(cursor)?;
+            }
+            Ok(())
+        }
+        Marker::Array32 => {
+            let len = read_data_u32(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
+            for _ in 0..len {
+                skip_value(cursor)?;
+            }
+            Ok(())
+        }
+        Marker::FixMap(len) => {
+            for _ in 0..len * 2 {
+                skip_value(cursor)?;
+            }
+            Ok(())
+        }
+        Marker::Map16 => {
+            let len = read_data_u16(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
+            for _ in 0..len * 2 {
+                skip_value(cursor)?;
+            }
+            Ok(())
+        }
+        Marker::Map32 => {
+            let len = read_data_u32(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
+            for _ in 0..len * 2 {
+                skip_value(cursor)?;
+            }
+            Ok(())
+        }
+        Marker::FixExt1 => {
+            cursor.set_position(cursor.position() + 2);
+            Ok(())
+        }
+        Marker::FixExt2 => {
+            cursor.set_position(cursor.position() + 3);
+            Ok(())
+        }
+        Marker::FixExt4 => {
+            cursor.set_position(cursor.position() + 5);
+            Ok(())
+        }
+        Marker::FixExt8 => {
+            cursor.set_position(cursor.position() + 9);
+            Ok(())
+        }
+        Marker::FixExt16 => {
+            cursor.set_position(cursor.position() + 17);
+            Ok(())
+        }
+        Marker::Ext8 => {
+            let len = read_data_u8(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
+            cursor.set_position(cursor.position() + 1 + len as u64);
+            Ok(())
+        }
+        Marker::Ext16 => {
+            let len = read_data_u16(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
+            cursor.set_position(cursor.position() + 1 + len as u64);
+            Ok(())
+        }
+        Marker::Ext32 => {
+            let len = read_data_u32(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
+            cursor.set_position(cursor.position() + 1 + len as u64);
+            Ok(())
+        }
+        Marker::Reserved => Err(ForgeError::Serialization("Reserved marker".into())),
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use serde_json::json;
+
+    #[test]
+    fn extracts_field_from_msgpack() {
+        let doc = json!({
+            "id": "123",
+            "active": true,
+            "nested": [1, 2, 3],
+            "target": "found_me"
+        });
+
+        let bytes = rmp_serde::to_vec_named(&doc).unwrap();
+
+        let extracted = extract_field_raw(&bytes, "target")
+            .unwrap()
+            .expect("should find target");
+        let decoded: String = rmp_serde::from_slice(extracted).unwrap();
+
+        assert_eq!(decoded, "found_me");
+    }
+
+    #[test]
+    fn returns_none_if_missing() {
+        let doc = json!({
+            "id": "123",
+            "active": true
+        });
+
+        let bytes = rmp_serde::to_vec_named(&doc).unwrap();
+
+        let extracted = extract_field_raw(&bytes, "missing").unwrap();
+        assert!(extracted.is_none());
+    }
+
+    #[test]
+    fn ignores_nested_fields_with_same_name() {
+        let doc = json!({
+            "id": "123",
+            "nested": {
+                "target": "wrong"
+            },
+            "target": "right"
+        });
+
+        let bytes = rmp_serde::to_vec_named(&doc).unwrap();
+
+        let extracted = extract_field_raw(&bytes, "target")
+            .unwrap()
+            .expect("should find target");
+        let decoded: String = rmp_serde::from_slice(extracted).unwrap();
+
+        assert_eq!(decoded, "right"); // Top level only
+    }
+}
diff --git a/crates/storage/src/index.rs b/crates/storage/src/index.rs
new file mode 100644
index 0000000..f897a47
--- /dev/null
+++ b/crates/storage/src/index.rs
@@ -0,0 +1,132 @@
+use forge_types::{ForgeError, Result};
+use redbx::{Database, ReadableDatabase, TableDefinition};
+use serde::{Deserialize, Serialize};
+
+/// Internal table mapping collections to their indexed fields.
+pub(crate) const INDEX_REGISTRY_TABLE: &str = "_forge_indexes";
+
+/// Serialized list of indexed fields for a collection.
+#[derive(Debug, Serialize, Deserialize)]
+pub(crate) struct IndexedFields {
+    pub(crate) fields: Vec<String>,
+}
+
+/// Computes the internal table name for a given secondary index.
+pub fn index_table_name(collection: &str, field: &str) -> String {
+    format!("_idx_{}_{}", collection, field)
+}
+
+/// Formats an index key. To support non-unique indexes, we append the document ID
+/// to the field value, separated by a null byte `\0`.
+///
+/// Format: `[value_bytes]\0[doc_id_bytes]`
+pub fn format_index_key(value: &[u8], doc_id: &str) -> Vec<u8> {
+    let mut key = Vec::with_capacity(value.len() + 1 + doc_id.len());
+    key.extend_from_slice(value);
+    key.push(0);
+    key.extend_from_slice(doc_id.as_bytes());
+    key
+}
+
+/// Registry of secondary indexes, managing metadata and lifecycle.
+pub struct IndexRegistry<'a> {
+    db: &'a Database,
+}
+
+impl<'a> IndexRegistry<'a> {
+    pub fn new(db: &'a Database) -> Self {
+        Self { db }
+    }
+
+    /// Retrieves all indexed fields for a given collection.
+    pub fn list_indexes(&self, collection: &str) -> Result<Vec<String>> {
+        let table_def = TableDefinition::<&str, &[u8]>::new(INDEX_REGISTRY_TABLE);
+        let txn = self.db.begin_read().map_err(redbx::Error::from)?;
+
+        let table = match txn.open_table(table_def) {
+            Ok(t) => t,
+            Err(redbx::TableError::TableDoesNotExist(_)) => return Ok(Vec::new()),
+            Err(e) => return Err(ForgeError::Storage(e.into())),
+        };
+
+        match table.get(collection).map_err(redbx::Error::from)? {
+            Some(bytes) => {
+                let registry: IndexedFields = crate::deserialize_doc(bytes.value())?;
+                Ok(registry.fields)
+            }
+            None => Ok(Vec::new()),
+        }
+    }
+
+    /// Registers a new field to be indexed for a collection.
+    /// Note: This only updates metadata. The caller is responsible for backfilling
+    /// the index table via `engine::rebuild_index`.
+    pub fn create_index(&self, collection: &str, field: &str) -> Result<()> {
+        let mut fields = self.list_indexes(collection)?;
+        if fields.iter().any(|f| f == field) {
+            return Ok(()); // Already exists
+        }
+
+        fields.push(field.to_string());
+        let registry = IndexedFields { fields };
+        let bytes = crate::serialize_doc(&registry)?;
+
+        let table_def = TableDefinition::<&str, &[u8]>::new(INDEX_REGISTRY_TABLE);
+        let txn = self.db.begin_write().map_err(redbx::Error::from)?;
+
+        {
+            let mut table = txn.open_table(table_def).map_err(redbx::Error::from)?;
+            table
+                .insert(collection, bytes.as_slice())
+                .map_err(redbx::Error::from)?;
+        }
+
+        txn.commit().map_err(redbx::Error::from)?;
+        Ok(())
+    }
+
+    /// Removes an index from the registry. The caller must drop the actual index table.
+    pub fn drop_index(&self, collection: &str, field: &str) -> Result<()> {
+        let mut fields = self.list_indexes(collection)?;
+        let initial_len = fields.len();
+        fields.retain(|f| f != field);
+
+        if fields.len() == initial_len {
+            return Ok(()); // Did not exist
+        }
+
+        let bytes = crate::serialize_doc(&IndexedFields { fields })?;
+        let table_def = TableDefinition::<&str, &[u8]>::new(INDEX_REGISTRY_TABLE);
+        let txn = self.db.begin_write().map_err(redbx::Error::from)?;
+
+        {
+            let mut table = txn.open_table(table_def).map_err(redbx::Error::from)?;
+            table
+                .insert(collection, bytes.as_slice())
+                .map_err(redbx::Error::from)?;
+        }
+
+        // Also instruct redbx to delete the backing index table if possible
+        // (redbx doesn't currently easily drop tables dynamically without iterating and deleting)
+        // We leave the orphaned table data around since it won't be queried, or the caller can manually empty it.
+
+        txn.commit().map_err(redbx::Error::from)?;
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn format_index_key_builds_correctly() {
+        let val = b"hello";
+        let doc_id = "doc-123";
+        let key = format_index_key(val, doc_id);
+
+        assert_eq!(&key[0..5], b"hello");
+        assert_eq!(key[5], 0);
+        assert_eq!(&key[6..], b"doc-123");
+    }
+}
diff --git a/crates/storage/src/lib.rs b/crates/storage/src/lib.rs
index 539c8e1..a9a7a2c 100644
--- a/crates/storage/src/lib.rs
+++ b/crates/storage/src/lib.rs
@@ -9,9 +9,12 @@
 pub mod audit;
 pub mod document;
 pub mod engine;
+pub mod extract;
+pub mod index;
 pub mod writer;
 
 pub use audit::AuditLog;
 pub use document::{deserialize_doc, serialize_doc};
 pub use engine::{StorageConfig, StorageEngine};
+pub use index::{IndexRegistry, format_index_key, index_table_name};
 pub use writer::{WriteSender, spawn_writer};
diff --git a/crates/types/Cargo.toml b/crates/types/Cargo.toml
index b88a50f..7ebd33c 100644
--- a/crates/types/Cargo.toml
+++ b/crates/types/Cargo.toml
@@ -11,6 +11,7 @@ thiserror = { workspace = true }
 serde = { workspace = true }
 redbx = { workspace = true }
 uuid = { workspace = true }
+serde_json.workspace = true
 
 [dev-dependencies]
 toml = { workspace = true }
diff --git a/crates/types/src/lib.rs b/crates/types/src/lib.rs
index 5ca69af..1762c85 100644
--- a/crates/types/src/lib.rs
+++ b/crates/types/src/lib.rs
@@ -8,11 +8,13 @@ pub mod audit;
 pub mod config;
 pub mod error;
 pub mod pagination;
+pub mod query;
 
 pub use audit::{AuditEntry, Outcome};
 pub use config::ForgeConfig;
 pub use error::ForgeError;
 pub use pagination::{PaginatedResponse, PaginationParams};
+pub use query::{JoinNode, JoinQuery};
 
 /// Shorthand for `std::result::Result<T, ForgeError>`.
 pub type Result<T> = std::result::Result<T, ForgeError>;
diff --git a/crates/types/src/pagination.rs b/crates/types/src/pagination.rs
index 4949889..87f83f8 100644
--- a/crates/types/src/pagination.rs
+++ b/crates/types/src/pagination.rs
@@ -13,6 +13,9 @@ pub struct PaginationParams {
     pub cursor: Option<String>,
     /// Maximum number of items to return. Defaults to 50 if missing.
     pub limit: Option<u32>,
+    /// Catch-all for extra query parameters (like `where[field]=value`).
+    #[serde(flatten)]
+    pub query_filters: std::collections::HashMap<String, String>,
 }
 
 impl Default for PaginationParams {
@@ -20,6 +23,7 @@ impl Default for PaginationParams {
         Self {
             cursor: None,
             limit: Some(50),
+            query_filters: std::collections::HashMap::new(),
         }
     }
 }
@@ -59,18 +63,21 @@ mod tests {
         let over = PaginationParams {
             cursor: None,
             limit: Some(5000),
+            query_filters: std::collections::HashMap::new(),
         };
         assert_eq!(over.resolved_limit(), 1000);
 
         let under = PaginationParams {
             cursor: None,
             limit: Some(0),
+            query_filters: std::collections::HashMap::new(),
         };
         assert_eq!(under.resolved_limit(), 1);
 
         let fine = PaginationParams {
             cursor: None,
             limit: Some(150),
+            query_filters: std::collections::HashMap::new(),
         };
         assert_eq!(fine.resolved_limit(), 150);
     }
diff --git a/crates/types/src/query.rs b/crates/types/src/query.rs
new file mode 100644
index 0000000..27376da
--- /dev/null
+++ b/crates/types/src/query.rs
@@ -0,0 +1,151 @@
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+/// Maximum allowed depth for relational joins to prevent combinatorial explosions
+/// and ensure predictable < 1ms response times.
+pub const MAX_JOIN_DEPTH: usize = 2;
+
+/// A graph node representing a relational join to another collection.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct JoinNode {
+    /// The target collection to join with.
+    pub collection: String,
+    /// The local field on the parent document. (e.g., `author_id`)
+    pub on: String,
+    /// The remote field on the target document. Typically `id`.
+    /// Due to our strict "fast path" architecture, joining on fields other
+    /// than `id` requires a secondary index on the target collection.
+    pub target: String,
+    /// Nested child joins. For example, joining `comments` onto `posts`,
+    /// then joining `author` onto each `comment`.
+    #[serde(default)]
+    pub joins: HashMap<String, JoinNode>,
+}
+
+impl JoinNode {
+    /// Validates that the recursive join structure does not exceed `MAX_JOIN_DEPTH`.
+    pub fn validate_depth(&self, current_depth: usize) -> Result<(), &'static str> {
+        if current_depth > MAX_JOIN_DEPTH {
+            return Err("Join depth exceeds the maximum allowed limit of 2");
+        }
+        for child in self.joins.values() {
+            child.validate_depth(current_depth + 1)?;
+        }
+        Ok(())
+    }
+}
+
+/// The root payload for the `POST /v1/_query` endpoint.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct JoinQuery {
+    /// The root collection to scan.
+    pub collection: String,
+    /// Exact match filters applied to the root collection to limit the working set.
+    #[serde(default, rename = "where")]
+    pub filter: HashMap<String, serde_json::Value>,
+    /// The relational tree of joins.
+    #[serde(default, rename = "join")]
+    pub joins: HashMap<String, JoinNode>,
+    /// The maximum number of root documents to return. Default 50.
+    pub limit: Option<u32>,
+    /// Keyset cursor for pagination over the root collection.
+    pub cursor: Option<String>,
+}
+
+impl JoinQuery {
+    /// Validates the query for safety limits, checking max depth.
+    pub fn validate(&self) -> Result<(), &'static str> {
+        for node in self.joins.values() {
+            node.validate_depth(1)?;
+        }
+        Ok(())
+    }
+
+    /// Resolves the requested root limits securely.
+    pub fn resolved_limit(&self) -> usize {
+        self.limit.unwrap_or(50).clamp(1, 1000) as usize
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn valid_join_depth_allowed() {
+        let mut child_join = HashMap::new();
+        child_join.insert(
+            "author".into(),
+            JoinNode {
+                collection: "users".into(),
+                on: "author_id".into(),
+                target: "id".into(),
+                joins: HashMap::new(),
+            },
+        );
+
+        let mut root_joins = HashMap::new();
+        root_joins.insert(
+            "comments".into(),
+            JoinNode {
+                collection: "comments".into(),
+                on: "id".into(),
+                target: "post_id".into(),
+                joins: child_join,
+            },
+        );
+
+        let query = JoinQuery {
+            collection: "posts".into(),
+            filter: HashMap::new(),
+            joins: root_joins,
+            limit: None,
+            cursor: None,
+        };
+
+        assert!(query.validate().is_ok());
+    }
+
+    #[test]
+    fn excessive_join_depth_rejected() {
+        let deep_node = JoinNode {
+            collection: "level3".into(),
+            on: "x".into(),
+            target: "id".into(),
+            joins: HashMap::new(),
+        };
+
+        let mut mid_map = HashMap::new();
+        mid_map.insert("l3".into(), deep_node);
+
+        let mid_node = JoinNode {
+            collection: "level2".into(),
+            on: "x".into(),
+            target: "id".into(),
+            joins: mid_map,
+        };
+
+        let mut top_map = HashMap::new();
+        top_map.insert("l2".into(), mid_node);
+
+        let top_node = JoinNode {
+            collection: "level1".into(),
+            on: "x".into(),
+            target: "id".into(),
+            joins: top_map,
+        };
+
+        let mut root_joins = HashMap::new();
+        root_joins.insert("l1".into(), top_node);
+
+        let query = JoinQuery {
+            collection: "root".into(),
+            filter: HashMap::new(),
+            joins: root_joins,
+            limit: None,
+            cursor: None,
+        };
+
+        assert!(query.validate().is_err());
+    }
+}

From db8ae35a4ffc6016bf362151cf95b92f2be7814c Mon Sep 17 00:00:00 2001
From: jonathanmagambo <jonathanomagambo@gmail.com>
Date: Sat, 7 Mar 2026 20:47:17 -0500
Subject: [PATCH 2/2] fix(server): resolve clippy warnings and finalize RLS
 pagination logic

---
 crates/server/src/lib.rs      | 337 ++++++++++++++++++++++++----------
 crates/storage/src/extract.rs |  43 ++++-
 2 files changed, 275 insertions(+), 105 deletions(-)

diff --git a/crates/server/src/lib.rs b/crates/server/src/lib.rs
index 765f0bf..5321c65 100644
--- a/crates/server/src/lib.rs
+++ b/crates/server/src/lib.rs
@@ -131,10 +131,11 @@ async fn list_docs(
     State(state): State<AppState>,
     Path(collection): Path<String>,
     axum::extract::Query(params): axum::extract::Query<forge_types::pagination::PaginationParams>,
+    axum::extract::Extension(claims): axum::extract::Extension<forge_auth::TokenClaims>,
     headers: axum::http::HeaderMap,
 ) -> Result<impl IntoResponse, StatusCode> {
     let limit = params.limit.unwrap_or(50).clamp(1, 100) as usize;
-    let cursor = params.cursor.as_deref();
+    let mut current_cursor = params.cursor.clone();
 
     let mut where_filter = None;
     for (k, v) in &params.query_filters {
@@ -145,88 +146,139 @@ async fn list_docs(
         }
     }
 
-    let query_result = match where_filter {
-        Some((field, val_str)) => {
-            let msgpack_val = match serde_json::from_str::<serde_json::Value>(val_str) {
-                Ok(j) => forge_storage::document::serialize_doc(&j).unwrap_or_default(),
-                Err(_) => forge_storage::document::serialize_doc(&serde_json::Value::String(
-                    val_str.to_string(),
-                ))
-                .unwrap_or_default(),
-            };
-            state
-                .engine
-                .lookup_by_index(&collection, field, &msgpack_val, cursor, limit)
-        }
-        None => state.engine.list_paginated(&collection, cursor, limit),
+    let msgpack_val = if let Some((_, val_str)) = where_filter {
+        let v = match serde_json::from_str::<serde_json::Value>(val_str) {
+            Ok(j) => forge_storage::document::serialize_doc(&j).unwrap_or_default(),
+            Err(_) => forge_storage::document::serialize_doc(&serde_json::Value::String(
+                val_str.to_string(),
+            ))
+            .unwrap_or_default(),
+        };
+        Some(v)
+    } else {
+        None
     };
 
-    match query_result {
-        Ok((docs, next_cursor)) => {
-            let accept = headers
-                .get(axum::http::header::ACCEPT)
-                .and_then(|h| h.to_str().ok())
-                .unwrap_or("");
+    let principal = &claims.sub;
+    let action = "Read";
+
+    let mut valid_docs = Vec::new();
+    let mut total_scanned = 0;
+    const MAX_SCAN_LIMIT: usize = 1000;
+    let mut last_scanned_id = None;
+
+    while valid_docs.len() < limit && total_scanned < MAX_SCAN_LIMIT {
+        let fetch_limit = std::cmp::min(MAX_SCAN_LIMIT - total_scanned, limit);
+
+        let query_result = match where_filter {
+            Some((field, _)) => state.engine.lookup_by_index(
+                &collection,
+                field,
+                msgpack_val.as_ref().unwrap(),
+                current_cursor.as_deref(),
+                fetch_limit,
+            ),
+            None => {
+                state
+                    .engine
+                    .list_paginated(&collection, current_cursor.as_deref(), fetch_limit)
+            }
+        }
+        .map_err(|e| {
+            tracing::error!("list_paginated failed: {e}");
+            StatusCode::INTERNAL_SERVER_ERROR
+        })?;
 
-            if accept.contains("application/json") {
-                let json_docs: Vec<serde_json::Value> = docs
-                    .into_iter()
-                    .map(|(id, bytes)| {
-                        let doc: serde_json::Value =
-                            rmp_serde::from_slice(&bytes).unwrap_or(serde_json::Value::Null);
-                        serde_json::json!({ "id": id, "doc": doc })
-                    })
-                    .collect();
-
-                let has_more = next_cursor.is_some();
-                let response = forge_types::pagination::PaginatedResponse {
-                    data: json_docs,
-                    next_cursor: next_cursor.clone(),
-                    has_more,
-                };
+        let (docs, next_cursor) = query_result;
+        let fetched_len = docs.len();
 
-                Ok((
-                    StatusCode::OK,
-                    [(axum::http::header::CONTENT_TYPE, "application/json")],
-                    serde_json::to_vec(&response).unwrap_or_default(),
-                )
-                    .into_response())
-            } else {
-                // For MessagePack, we deserialize the internal payload, wrap it,
-                // and pack it into a structured array inside a PaginatedResponse.
-                let mut wrapper = Vec::with_capacity(docs.len());
-                for (id, bytes) in docs {
-                    if let Ok(val) = rmp_serde::from_slice::<serde_json::Value>(&bytes) {
-                        wrapper.push(serde_json::json!({ "id": id, "doc": val }));
-                    }
-                }
+        if fetched_len == 0 {
+            break;
+        }
 
-                let has_more = next_cursor.is_some();
-                let response = forge_types::pagination::PaginatedResponse {
-                    data: wrapper,
-                    next_cursor,
-                    has_more,
-                };
+        total_scanned += fetched_len;
 
-                let resp_bytes =
-                    forge_storage::document::serialize_doc_named(&response).map_err(|e| {
-                        tracing::error!("failed to serialize paginated list to msgpack: {e}");
-                        StatusCode::INTERNAL_SERVER_ERROR
-                    })?;
+        for (id, bytes) in docs {
+            let resource = format!("{}/{}", collection, id);
+            let auth_ctx = forge_query::context::AuthContext::new(principal, action, &resource);
 
-                Ok((
-                    StatusCode::OK,
-                    [(axum::http::header::CONTENT_TYPE, "application/msgpack")],
-                    resp_bytes,
-                )
-                    .into_response())
+            if state.policy_engine.check_permit(&auth_ctx).is_ok() {
+                valid_docs.push((id.clone(), bytes));
+                if valid_docs.len() == limit {
+                    last_scanned_id = Some(id);
+                    break;
+                }
             }
+            last_scanned_id = Some(id);
         }
-        Err(e) => {
-            tracing::error!("list_paginated failed: {e}");
-            Err(StatusCode::INTERNAL_SERVER_ERROR)
+
+        current_cursor = next_cursor.clone();
+        if next_cursor.is_none() {
+            break;
         }
     }
+
+    let next_cursor = if valid_docs.len() == limit || current_cursor.is_some() {
+        last_scanned_id.or(current_cursor)
+    } else {
+        None
+    };
+
+    let accept = headers
+        .get(axum::http::header::ACCEPT)
+        .and_then(|h| h.to_str().ok())
+        .unwrap_or("");
+
+    if accept.contains("application/json") {
+        let json_docs: Vec<serde_json::Value> = valid_docs
+            .into_iter()
+            .map(|(id, bytes)| {
+                let doc: serde_json::Value =
+                    rmp_serde::from_slice(&bytes).unwrap_or(serde_json::Value::Null);
+                serde_json::json!({ "id": id, "doc": doc })
+            })
+            .collect();
+
+        let has_more = next_cursor.is_some();
+        let response = forge_types::pagination::PaginatedResponse {
+            data: json_docs,
+            next_cursor: next_cursor.clone(),
+            has_more,
+        };
+
+        Ok((
+            StatusCode::OK,
+            [(axum::http::header::CONTENT_TYPE, "application/json")],
+            serde_json::to_vec(&response).unwrap_or_default(),
+        )
+            .into_response())
+    } else {
+        let mut wrapper = Vec::with_capacity(valid_docs.len());
+        for (id, bytes) in valid_docs {
+            if let Ok(val) = rmp_serde::from_slice::<serde_json::Value>(&bytes) {
+                wrapper.push(serde_json::json!({ "id": id, "doc": val }));
+            }
+        }
+
+        let has_more = next_cursor.is_some();
+        let response = forge_types::pagination::PaginatedResponse {
+            data: wrapper,
+            next_cursor,
+            has_more,
+        };
+
+        let resp_bytes = forge_storage::document::serialize_doc_named(&response).map_err(|e| {
+            tracing::error!("failed to serialize paginated list to msgpack: {e}");
+            StatusCode::INTERNAL_SERVER_ERROR
+        })?;
+
+        Ok((
+            StatusCode::OK,
+            [(axum::http::header::CONTENT_TYPE, "application/msgpack")],
+            resp_bytes,
+        )
+            .into_response())
+    }
 }
 
 /// POST /v1/:collection
@@ -368,10 +420,18 @@ async fn get_doc(
 /// Helper function to execute local BFS N+1 joins directly on the B-Tree memory maps.
 fn process_joins(
     engine: &forge_storage::StorageEngine,
+    policy_engine: &forge_query::policy::PolicyEngine,
+    principal: &str,
     parent_docs: &mut [serde_json::Value],
     joins: &std::collections::HashMap<String, forge_types::query::JoinNode>,
 ) {
     for (join_key, node) in joins {
+        let coll_ctx = forge_query::context::AuthContext::new(principal, "Read", &node.collection);
+        if policy_engine.check_permit(&coll_ctx).is_err() {
+            tracing::warn!("Join denied at collection level: {}", node.collection);
+            continue;
+        }
+
         for parent in parent_docs.iter_mut() {
             let parent_obj = if let Some(o) = parent.as_object_mut() {
                 o
@@ -393,13 +453,21 @@ fn process_joins(
                 if node.target == "id" {
                     if let Some(target_id) = on_v.as_str()
                         && let Ok(Some(bytes)) = engine.get(&node.collection, target_id)
-                            && let Ok(doc) = rmp_serde::from_slice::<serde_json::Value>(&bytes) {
-                                joined_records.push(serde_json::json!({
-                                    "id": target_id,
-                                    "doc": doc,
-                                    "_joins": serde_json::json!({})
-                                }));
-                            }
+                        && let Ok(doc) = rmp_serde::from_slice::<serde_json::Value>(&bytes)
+                    {
+                        let doc_ctx = forge_query::context::AuthContext::new(
+                            principal,
+                            "Read",
+                            format!("{}/{}", node.collection, target_id),
+                        );
+                        if policy_engine.check_permit(&doc_ctx).is_ok() {
+                            joined_records.push(serde_json::json!({
+                                "id": target_id,
+                                "doc": doc,
+                                "_joins": serde_json::json!({})
+                            }));
+                        }
+                    }
                 } else {
                     let msgpack_val =
                         forge_storage::document::serialize_doc(&on_v).unwrap_or_default();
@@ -411,7 +479,15 @@ fn process_joins(
                         100,
                     ) {
                         for (j_id, j_bytes) in matches {
-                            if let Ok(doc) = rmp_serde::from_slice::<serde_json::Value>(&j_bytes) {
+                            let doc_ctx = forge_query::context::AuthContext::new(
+                                principal,
+                                "Read",
+                                format!("{}/{}", node.collection, j_id),
+                            );
+                            if policy_engine.check_permit(&doc_ctx).is_ok()
+                                && let Ok(doc) =
+                                    rmp_serde::from_slice::<serde_json::Value>(&j_bytes)
+                            {
                                 joined_records.push(serde_json::json!({
                                     "id": j_id,
                                     "doc": doc,
@@ -424,7 +500,13 @@ fn process_joins(
             }
 
             if !node.joins.is_empty() && !joined_records.is_empty() {
-                process_joins(engine, &mut joined_records, &node.joins);
+                process_joins(
+                    engine,
+                    policy_engine,
+                    principal,
+                    &mut joined_records,
+                    &node.joins,
+                );
             }
 
             if let Some(joins_map) = parent_obj.get_mut("_joins").and_then(|j| j.as_object_mut()) {
@@ -439,6 +521,7 @@ fn process_joins(
 /// nested and securely validated Join payloads in < 1ms without explicit SQL syntax.
 async fn query_docs(
     State(state): State<AppState>,
+    axum::extract::Extension(claims): axum::extract::Extension<forge_auth::TokenClaims>,
     axum::Json(query): axum::Json<forge_types::query::JoinQuery>,
 ) -> Result<impl IntoResponse, StatusCode> {
     if let Err(e) = query.validate() {
@@ -446,27 +529,83 @@ async fn query_docs(
         return Err(StatusCode::BAD_REQUEST);
     }
 
+    let principal = &claims.sub;
+    let action = "Read";
+
+    let root_ctx = forge_query::context::AuthContext::new(principal, action, &query.collection);
+    if state.policy_engine.check_permit(&root_ctx).is_err() {
+        tracing::warn!("Query denied at root collection: {}", query.collection);
+        return Err(StatusCode::FORBIDDEN);
+    }
+
     let limit = query.resolved_limit();
-    let (docs, next_cursor) = if let Some((k, v)) = query.filter.iter().next() {
-        let msgpack_val = forge_storage::document::serialize_doc(v).unwrap_or_default();
-        state
-            .engine
-            .lookup_by_index(
+    let mut current_cursor = query.cursor.clone();
+
+    let mut valid_docs = Vec::new();
+    let mut total_scanned = 0;
+    const MAX_SCAN_LIMIT: usize = 1000;
+    let mut last_scanned_id = None;
+
+    let msgpack_val = if let Some((_, v)) = query.filter.iter().next() {
+        Some(forge_storage::document::serialize_doc(v).unwrap_or_default())
+    } else {
+        None
+    };
+
+    while valid_docs.len() < limit && total_scanned < MAX_SCAN_LIMIT {
+        let fetch_limit = std::cmp::min(MAX_SCAN_LIMIT - total_scanned, limit);
+
+        let query_result = if let Some((k, _)) = query.filter.iter().next() {
+            state.engine.lookup_by_index(
                 &query.collection,
                 k,
-                &msgpack_val,
-                query.cursor.as_deref(),
-                limit,
+                msgpack_val.as_ref().unwrap(),
+                current_cursor.as_deref(),
+                fetch_limit,
             )
-            .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
+        } else {
+            state
+                .engine
+                .list_paginated(&query.collection, current_cursor.as_deref(), fetch_limit)
+        }
+        .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?;
+
+        let (docs, next_cursor) = query_result;
+        let fetched_len = docs.len();
+
+        if fetched_len == 0 {
+            break;
+        }
+
+        total_scanned += fetched_len;
+
+        for (id, bytes) in docs {
+            let resource = format!("{}/{}", query.collection, id);
+            let auth_ctx = forge_query::context::AuthContext::new(principal, action, &resource);
+
+            if state.policy_engine.check_permit(&auth_ctx).is_ok() {
+                valid_docs.push((id.clone(), bytes));
+                if valid_docs.len() == limit {
+                    last_scanned_id = Some(id);
+                    break;
+                }
+            }
+            last_scanned_id = Some(id);
+        }
+
+        current_cursor = next_cursor.clone();
+        if next_cursor.is_none() {
+            break;
+        }
+    }
+
+    let next_cursor = if valid_docs.len() == limit || current_cursor.is_some() {
+        last_scanned_id.or(current_cursor)
     } else {
-        state
-            .engine
-            .list_paginated(&query.collection, query.cursor.as_deref(), limit)
-            .map_err(|_| StatusCode::INTERNAL_SERVER_ERROR)?
+        None
     };
 
-    let mut root_docs: Vec<serde_json::Value> = docs
+    let mut root_docs: Vec<serde_json::Value> = valid_docs
         .into_iter()
         .map(|(id, bytes)| {
             let doc: serde_json::Value =
@@ -479,7 +618,13 @@ async fn query_docs(
         })
         .collect();
 
-    process_joins(&state.engine, &mut root_docs, &query.joins);
+    process_joins(
+        &state.engine,
+        &state.policy_engine,
+        principal,
+        &mut root_docs,
+        &query.joins,
+    );
 
     let has_more = next_cursor.is_some();
     let response = forge_types::pagination::PaginatedResponse {
diff --git a/crates/storage/src/extract.rs b/crates/storage/src/extract.rs
index b1d30a9..c3381cb 100644
--- a/crates/storage/src/extract.rs
+++ b/crates/storage/src/extract.rs
@@ -26,7 +26,7 @@ pub fn extract_field_raw<'a>(doc: &'a [u8], field: &str) -> Result<Option<&'a [u
     for _ in 0..len {
         // Read key
         let key_start = cursor.position() as usize;
-        skip_value(&mut cursor)?;
+        skip_value(&mut cursor, 0)?;
         let key_end = cursor.position() as usize;
 
         let key_slice = &doc[key_start..key_end];
@@ -43,7 +43,7 @@ pub fn extract_field_raw<'a>(doc: &'a [u8], field: &str) -> Result<Option<&'a [u
 
         // Read value bounds
         let val_start = cursor.position() as usize;
-        skip_value(&mut cursor)?;
+        skip_value(&mut cursor, 0)?;
         let val_end = cursor.position() as usize;
 
         if is_match {
@@ -73,7 +73,11 @@ fn read_data_u32(cursor: &mut Cursor<&[u8]>) -> std::result::Result<u32, std::io
 }
 
 /// Recursively skips a single MessagePack value in the cursor.
-fn skip_value(cursor: &mut Cursor<&[u8]>) -> Result<()> {
+fn skip_value(cursor: &mut Cursor<&[u8]>, depth: u32) -> Result<()> {
+    if depth > 64 {
+        return Err(ForgeError::Serialization("depth limit exceeded".into()));
+    }
+
     let marker = rmp::decode::read_marker(cursor)
         .map_err(|_| ForgeError::Serialization("EOF during skip".into()))?;
 
@@ -118,41 +122,41 @@ fn skip_value(cursor: &mut Cursor<&[u8]>) -> Result<()> {
         }
         Marker::FixArray(len) => {
             for _ in 0..len {
-                skip_value(cursor)?;
+                skip_value(cursor, depth + 1)?;
             }
             Ok(())
         }
         Marker::Array16 => {
             let len = read_data_u16(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
             for _ in 0..len {
-                skip_value(cursor)?;
+                skip_value(cursor, depth + 1)?;
             }
             Ok(())
         }
         Marker::Array32 => {
             let len = read_data_u32(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
             for _ in 0..len {
-                skip_value(cursor)?;
+                skip_value(cursor, depth + 1)?;
             }
             Ok(())
         }
         Marker::FixMap(len) => {
             for _ in 0..len * 2 {
-                skip_value(cursor)?;
+                skip_value(cursor, depth + 1)?;
             }
             Ok(())
         }
         Marker::Map16 => {
             let len = read_data_u16(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
             for _ in 0..len * 2 {
-                skip_value(cursor)?;
+                skip_value(cursor, depth + 1)?;
             }
             Ok(())
         }
         Marker::Map32 => {
             let len = read_data_u32(cursor).map_err(|_| ForgeError::Serialization("EOF".into()))?;
             for _ in 0..len * 2 {
-                skip_value(cursor)?;
+                skip_value(cursor, depth + 1)?;
             }
             Ok(())
         }
@@ -251,4 +255,25 @@ mod tests {
 
         assert_eq!(decoded, "right"); // Top level only
     }
+
+    #[test]
+    fn limits_recursion_depth_to_prevent_stack_overflow() {
+        // Create 100 nested arrays: [[[[...]]]]
+        let mut bytes = Vec::new();
+        for _ in 0..100 {
+            rmp::encode::write_array_len(&mut bytes, 1).unwrap();
+        }
+        rmp::encode::write_str(&mut bytes, "deep_value").unwrap();
+
+        // The document must be a map at the root for extract_field_raw
+        let mut doc = Vec::new();
+        rmp::encode::write_map_len(&mut doc, 1).unwrap();
+        rmp::encode::write_str(&mut doc, "nested_array").unwrap();
+        doc.extend(bytes);
+
+        let res = extract_field_raw(&doc, "missing");
+        assert!(
+            matches!(res, Err(ForgeError::Serialization(msg)) if msg.contains("depth limit exceeded"))
+        );
+    }
 }