diff --git a/Cargo.lock b/Cargo.lock index 20adc322..760a55c6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1178,6 +1178,15 @@ dependencies = [ "syn 2.0.108", ] +[[package]] +name = "eql-types" +version = "0.1.0" +dependencies = [ + "eql-scalars", + "serde", + "serde_json", +] + [[package]] name = "eql_tests" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index 6e0b6760..2baf1e5b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,10 @@ # crates/eql-codegen — the SQL generator binary (stub here; Plan 2 fills it in). # crates/eql-tests-macros — proc-macros expanding the single scalar-harness # list into the per-type SQLx-matrix wiring. +# crates/eql-types — canonical Rust wire types for EQL payloads, parity- +# tested against the eql-scalars catalog. (TypeScript +# bindings and JSON Schemas are generated from these +# types in stacked changes.) # tests/sqlx — the existing `eql_tests` SQLx integration crate. # # resolver = "2" keeps the heavy test-crate feature set (sqlx/tokio/cipherstash- @@ -20,6 +24,7 @@ members = [ "crates/eql-scalars", "crates/eql-codegen", "crates/eql-tests-macros", + "crates/eql-types", "tests/sqlx", ] default-members = ["tests/sqlx"] diff --git a/crates/eql-codegen/src/consts.rs b/crates/eql-codegen/src/consts.rs index 9266882f..a3bccf3c 100644 --- a/crates/eql-codegen/src/consts.rs +++ b/crates/eql-codegen/src/consts.rs @@ -11,10 +11,11 @@ pub(crate) const AUTO_GENERATED_HEADER: &str = "-- AUTOMATICALLY GENERATED FILE. /// the core types at. pub(crate) const SCHEMA: &str = "eql_v3"; -/// Always-present payload keys checked for presence in every domain CHECK, in -/// order: envelope version (`v`), ident (`i`), ciphertext (`c`). Term-specific -/// keys are appended after these by `context::domain_block`. -pub(crate) const ENVELOPE_KEYS: &[&str] = &["v", "i", "c"]; +/// Always-present payload keys checked for presence in every domain CHECK. +/// Term-specific keys are appended after these by `context::domain_block`. +/// Defined in the catalog (`eql_scalars::ENVELOPE_KEYS`) so the CHECKs and +/// the `eql-types` payload structs share one envelope definition. +pub(crate) const ENVELOPE_KEYS: &[&str] = eql_scalars::ENVELOPE_KEYS; /// Escape a string for use inside a single-quoted SQL literal by doubling /// embedded single quotes. diff --git a/crates/eql-scalars/src/lib.rs b/crates/eql-scalars/src/lib.rs index d3d055e1..8cbabcd8 100644 --- a/crates/eql-scalars/src/lib.rs +++ b/crates/eql-scalars/src/lib.rs @@ -73,6 +73,17 @@ pub enum ScalarKind { Timestamptz, } +/// Always-present payload keys required by every generated domain CHECK, +/// before the domain's term keys, in order: envelope version (`v`), ident +/// (`i`), ciphertext (`c`). +/// +/// Lives here — in the catalog — because it is cross-schema contract data +/// consumed on both sides of the generated surface: `eql-codegen` builds +/// every domain CHECK from it, and `eql-types` builds its payload structs +/// and parity tests against it. One definition, so the envelope cannot +/// drift between the SQL and the canonical types. +pub const ENVELOPE_KEYS: &[&str] = &["v", "i", "c"]; + /// A fixed index term known to the scalar materializer. /// /// `Hm` provides equality; `Ore` provides equality plus ordering. The diff --git a/crates/eql-types/.gitignore b/crates/eql-types/.gitignore new file mode 100644 index 00000000..4fffb2f8 --- /dev/null +++ b/crates/eql-types/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/crates/eql-types/Cargo.toml b/crates/eql-types/Cargo.toml new file mode 100644 index 00000000..d01b6a0a --- /dev/null +++ b/crates/eql-types/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "eql-types" +version = "0.1.0" +edition = "2021" +description = "Canonical wire types for EQL payloads — the single Rust source of truth (TypeScript bindings and JSON Schemas are generated from these types in stacked changes)." + +[dependencies] +serde = { version = "1", features = ["derive"] } + +[dev-dependencies] +# Parity oracle: tests/catalog_parity.rs asserts the v3 domain inventory +# exactly covers eql_scalars::CATALOG, so the types here cannot drift from +# the generated SQL surface. +eql-scalars = { path = "../eql-scalars" } +serde_json = "1" diff --git a/crates/eql-types/README.md b/crates/eql-types/README.md new file mode 100644 index 00000000..f3256e16 --- /dev/null +++ b/crates/eql-types/README.md @@ -0,0 +1,82 @@ +# eql-types + +Canonical wire types for EQL payloads — **one Rust definition per payload +shape**, the single source of truth for every tool that produces or consumes +EQL payloads (`cipherstash-client`, `protect-ffi`, CipherStash Proxy). + +TypeScript bindings (via [`ts-rs`]) and JSON Schemas (via [`schemars`]) are +generated from these definitions in stacked changes; this crate is the +Rust contract only. + +## Why + +Type information is lost at every hop of `EQL → cipherstash-client → +protect-ffi → stack`. protect-ffi hand-writes its TypeScript types; they drift +from the Rust they describe; stack widens them further. The result is bugs +like the `protect-dynamodb` search-term check that validates a payload shape +EQL never actually defined. A generated, single-source crate removes the +hand-copying. + +## Capability-encoded types + +The [`src/v3/`](src/v3/) module has one type per **SQL domain** in the +`eql_v3` schema — `Int4` / `Int4Eq` / `Int4Ord` / `Int4OrdOre`, and likewise +for `int2`, `int8`, `date`, `timestamptz` (eq-only), and `text` (which adds +`TextMatch`) — each carrying its index terms as **required** fields. The +capability is the type identity; `Option` never appears. A payload missing +its term key fails to deserialize: the Rust analogue of the SQL domain's +CHECK constraint. + +Shared wire fields are reusable newtypes in +[`src/v3/terms.rs`](src/v3/terms.rs): + +| Newtype | Wire key | Inner | Backs | +|---------|----------|-------|-------| +| `Ciphertext` | `c` | `String` | every domain (envelope) | +| `Hmac256` | `hm` | `String` | `_eq` domains | +| `OreBlockU64_8_256` | `ob` | `Vec` | `_ord` / `_ord_ore` domains | +| `BloomFilter` | `bf` | `Vec` (signed!) | `_match` domains | + +Note "v3" names the SQL schema generation (`eql_v3.*`); the JSON envelope +version is still `v: 2` — the generated domain CHECKs assert it, and the wire +field names are unchanged from v2 (the purpose-named rename in +`docs/plans/eql-payload-scheme-discipline-rfc.md` is deferred). + +## Drift protection + +`tests/catalog_parity.rs` asserts the domain inventory — +[`v3::all()`](src/v3/mod.rs), a `Vec>` of zero-sized +type-level handles — exactly covers `eql-scalars::CATALOG` (the same catalog +that generates the `eql_v3` SQL surface): every domain, in order. Adding a +scalar to the catalog without adding its types here fails the build. +Wire-key strictness (required term keys, unknown-key rejection, envelope +version) is covered per-type in `tests/v3_conformance.rs` and pinned against +the catalog by the JSON Schema parity test in the stacked schemars change. + +## Develop + +```sh +cargo test -p eql-types +``` + +The crate is also part of the lean `mise run test:crates` set (fmt, clippy, +test — no database). + +## Future direction: self-describing payloads + +On the wire, a v3 payload is discriminated only by *which key is present* +(`hm` vs `ob` vs `bf`) — the SQL domain name carries the rest. Once the JSON +leaves SQL (into protect-ffi, into TypeScript, into a log line) that +information is gone, and a consumer is back to sniffing keys: the untagged +failure mode that produced the original protect-dynamodb bug. An earlier +prototype here carried an `Int4Tagged` enum with a one-field capability tag +(`"x": "int4_eq"`), which generates a clean TypeScript discriminated union +and a JSON Schema `oneOf` with per-branch `const`s. It was removed because +the tag is not part of the v3 wire contract (the generated domain CHECKs +know no `x` key) — but it remains the recommended shape if a future payload +revision adds a discriminator. See +`docs/plans/eql-payload-scheme-discipline-rfc.md` for the wider payload +evolution plan. + +[`ts-rs`]: https://github.com/Aleph-Alpha/ts-rs +[`schemars`]: https://graham.cool/schemars/ diff --git a/crates/eql-types/src/lib.rs b/crates/eql-types/src/lib.rs new file mode 100644 index 00000000..9232bdd9 --- /dev/null +++ b/crates/eql-types/src/lib.rs @@ -0,0 +1,78 @@ +//! # eql-types — canonical EQL payload types +//! +//! One Rust definition per EQL payload shape — the single source of truth +//! for every tool that produces or consumes EQL payloads +//! (`cipherstash-client`, `protect-ffi`, CipherStash Proxy). TypeScript +//! bindings and JSON Schemas are generated from these definitions in +//! stacked changes; the Rust types are the contract. +//! +//! The [`v3`] module holds the `eql_v3` encrypted-domain types: one struct +//! per SQL domain (`eql_v3.int4_eq`, `eql_v3.text_match`, …), +//! *capability-encoded* — index terms are required fields, never `Option`. +//! It mirrors `eql-scalars::CATALOG` 1:1, enforced by +//! `tests/catalog_parity.rs`. +//! +//! Wire rule: **field names ARE wire names** — no `#[serde(rename)]` +//! anywhere. The struct definition reads exactly like the JSON payload. + +use serde::{Deserialize, Serialize}; + +pub mod v3; + +/// EQL wire-format version. Hard-coded to `2` for every payload — including +/// the [`v3`] tier, whose generated domain CHECKs assert `VALUE->>'v' = '2'`. +pub const EQL_SCHEMA_VERSION: u16 = 2; + +/// The envelope version field (`v`) — always exactly [`EQL_SCHEMA_VERSION`] +/// on the wire. +/// +/// Deserialization rejects any other value: the Rust analogue of the domain +/// CHECK's `VALUE->>'v' = '2'`, so a wrong-version payload fails at the type +/// boundary instead of at INSERT. The inner value is private; the only +/// constructible instance is the current version. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize)] +pub struct SchemaVersion(u16); + +impl SchemaVersion { + /// The current (only) wire version, `2`. + pub const CURRENT: Self = Self(EQL_SCHEMA_VERSION); + + /// The wire value. + pub const fn get(self) -> u16 { + self.0 + } +} + +impl Default for SchemaVersion { + fn default() -> Self { + Self::CURRENT + } +} + +impl<'de> Deserialize<'de> for SchemaVersion { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let v = u16::deserialize(deserializer)?; + if v == EQL_SCHEMA_VERSION { + Ok(Self(v)) + } else { + Err(serde::de::Error::custom(format!( + "unsupported EQL schema version {v} (expected {EQL_SCHEMA_VERSION})" + ))) + } + } +} + +/// Table + column identifier — wire shape `{"t": "...", "c": "..."}`. +/// +/// Shared by every payload. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Identifier { + /// Table name. + pub t: String, + /// Column name. + pub c: String, +} diff --git a/crates/eql-types/src/v3/date.rs b/crates/eql-types/src/v3/date.rs new file mode 100644 index 00000000..00cb706a --- /dev/null +++ b/crates/eql-types/src/v3/date.rs @@ -0,0 +1,107 @@ +//! The `date` encrypted-domain family — an ordered, non-integer scalar. +//! Same four-domain ordered shape as [`crate::v3::int4`] (ORE compares +//! ciphertext, so dates order like integers); see that module for the +//! capability table. + +use crate::v3::terms::{Ciphertext, Hmac256, OreBlockU64_8_256}; +use crate::v3::DomainType; +use crate::{Identifier, SchemaVersion}; +use serde::{Deserialize, Serialize}; + +/// `eql_v3.date` — storage only; every operator is blocked. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Date { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, +} + +impl DomainType for Date { + fn sql_domain_static() -> &'static str { + "eql_v3.date" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.date_eq` — HMAC equality (`=`, `<>`). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct DateEq { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// HMAC-SHA-256 equality term. + pub hm: Hmac256, +} + +impl DomainType for DateEq { + fn sql_domain_static() -> &'static str { + "eql_v3.date_eq" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.date_ord_ore` — full comparison, scheme-explicit name. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct DateOrdOre { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// Block-ORE order term. Serves equality too. + pub ob: OreBlockU64_8_256, +} + +impl DomainType for DateOrdOre { + fn sql_domain_static() -> &'static str { + "eql_v3.date_ord_ore" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.date_ord` — full comparison (`=` `<>` `<` `<=` `>` `>=`). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct DateOrd { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// Block-ORE order term. Serves equality too. + pub ob: OreBlockU64_8_256, +} + +impl DomainType for DateOrd { + fn sql_domain_static() -> &'static str { + "eql_v3.date_ord" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} diff --git a/crates/eql-types/src/v3/int2.rs b/crates/eql-types/src/v3/int2.rs new file mode 100644 index 00000000..b641408d --- /dev/null +++ b/crates/eql-types/src/v3/int2.rs @@ -0,0 +1,105 @@ +//! The `int2` encrypted-domain family. Same four-domain ordered shape as +//! [`crate::v3::int4`] — see that module for the capability table. + +use crate::v3::terms::{Ciphertext, Hmac256, OreBlockU64_8_256}; +use crate::v3::DomainType; +use crate::{Identifier, SchemaVersion}; +use serde::{Deserialize, Serialize}; + +/// `eql_v3.int2` — storage only; every operator is blocked. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int2 { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, +} + +impl DomainType for Int2 { + fn sql_domain_static() -> &'static str { + "eql_v3.int2" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.int2_eq` — HMAC equality (`=`, `<>`). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int2Eq { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// HMAC-SHA-256 equality term. + pub hm: Hmac256, +} + +impl DomainType for Int2Eq { + fn sql_domain_static() -> &'static str { + "eql_v3.int2_eq" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.int2_ord_ore` — full comparison, scheme-explicit name. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int2OrdOre { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// Block-ORE order term. Serves equality too. + pub ob: OreBlockU64_8_256, +} + +impl DomainType for Int2OrdOre { + fn sql_domain_static() -> &'static str { + "eql_v3.int2_ord_ore" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.int2_ord` — full comparison (`=` `<>` `<` `<=` `>` `>=`). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int2Ord { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// Block-ORE order term. Serves equality too. + pub ob: OreBlockU64_8_256, +} + +impl DomainType for Int2Ord { + fn sql_domain_static() -> &'static str { + "eql_v3.int2_ord" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} diff --git a/crates/eql-types/src/v3/int4.rs b/crates/eql-types/src/v3/int4.rs new file mode 100644 index 00000000..44dbcf34 --- /dev/null +++ b/crates/eql-types/src/v3/int4.rs @@ -0,0 +1,113 @@ +//! The `int4` encrypted-domain family — the reference scalar. +//! +//! | Rust type | SQL domain | Required keys | Operators | +//! |----------------|------------------------|---------------|----------------------------| +//! | [`Int4`] | `eql_v3.int4` | `v` `i` `c` | none (storage only) | +//! | [`Int4Eq`] | `eql_v3.int4_eq` | `v` `i` `c` `hm` | `=` `<>` | +//! | [`Int4OrdOre`] | `eql_v3.int4_ord_ore` | `v` `i` `c` `ob` | `=` `<>` `<` `<=` `>` `>=` | +//! | [`Int4Ord`] | `eql_v3.int4_ord` | `v` `i` `c` `ob` | `=` `<>` `<` `<=` `>` `>=` | + +use crate::v3::terms::{Ciphertext, Hmac256, OreBlockU64_8_256}; +use crate::v3::DomainType; +use crate::{Identifier, SchemaVersion}; +use serde::{Deserialize, Serialize}; + +/// `eql_v3.int4` — storage only; every operator is blocked. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int4 { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, +} + +impl DomainType for Int4 { + fn sql_domain_static() -> &'static str { + "eql_v3.int4" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.int4_eq` — HMAC equality (`=`, `<>`). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int4Eq { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// HMAC-SHA-256 equality term. + pub hm: Hmac256, +} + +impl DomainType for Int4Eq { + fn sql_domain_static() -> &'static str { + "eql_v3.int4_eq" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.int4_ord_ore` — full comparison (`=` `<>` `<` `<=` `>` `>=`), +/// scheme-explicit name. Same shape as [`Int4Ord`], distinct SQL domain. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int4OrdOre { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// Block-ORE order term. Serves equality too — ORE over a + /// full-domain `int4` is lossless, so no separate `hm` is carried. + pub ob: OreBlockU64_8_256, +} + +impl DomainType for Int4OrdOre { + fn sql_domain_static() -> &'static str { + "eql_v3.int4_ord_ore" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.int4_ord` — full comparison (`=` `<>` `<` `<=` `>` `>=`). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int4Ord { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// Block-ORE order term. Serves equality too. + pub ob: OreBlockU64_8_256, +} + +impl DomainType for Int4Ord { + fn sql_domain_static() -> &'static str { + "eql_v3.int4_ord" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} diff --git a/crates/eql-types/src/v3/int8.rs b/crates/eql-types/src/v3/int8.rs new file mode 100644 index 00000000..4ab0a232 --- /dev/null +++ b/crates/eql-types/src/v3/int8.rs @@ -0,0 +1,105 @@ +//! The `int8` encrypted-domain family. Same four-domain ordered shape as +//! [`crate::v3::int4`] — see that module for the capability table. + +use crate::v3::terms::{Ciphertext, Hmac256, OreBlockU64_8_256}; +use crate::v3::DomainType; +use crate::{Identifier, SchemaVersion}; +use serde::{Deserialize, Serialize}; + +/// `eql_v3.int8` — storage only; every operator is blocked. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int8 { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, +} + +impl DomainType for Int8 { + fn sql_domain_static() -> &'static str { + "eql_v3.int8" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.int8_eq` — HMAC equality (`=`, `<>`). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int8Eq { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// HMAC-SHA-256 equality term. + pub hm: Hmac256, +} + +impl DomainType for Int8Eq { + fn sql_domain_static() -> &'static str { + "eql_v3.int8_eq" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.int8_ord_ore` — full comparison, scheme-explicit name. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int8OrdOre { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// Block-ORE order term. Serves equality too. + pub ob: OreBlockU64_8_256, +} + +impl DomainType for Int8OrdOre { + fn sql_domain_static() -> &'static str { + "eql_v3.int8_ord_ore" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.int8_ord` — full comparison (`=` `<>` `<` `<=` `>` `>=`). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Int8Ord { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// Block-ORE order term. Serves equality too. + pub ob: OreBlockU64_8_256, +} + +impl DomainType for Int8Ord { + fn sql_domain_static() -> &'static str { + "eql_v3.int8_ord" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} diff --git a/crates/eql-types/src/v3/mod.rs b/crates/eql-types/src/v3/mod.rs new file mode 100644 index 00000000..97262778 --- /dev/null +++ b/crates/eql-types/src/v3/mod.rs @@ -0,0 +1,139 @@ +//! # `eql_v3` domain payload types +//! +//! One Rust struct per **SQL domain** in the `eql_v3` schema — the +//! capability-encoded design from the original `eql_v2_int4` prototype +//! (PR #236's first cut), formalized: +//! the SQL surface is generated from `eql-scalars::CATALOG`, and these types +//! mirror it 1:1 (enforced by `tests/catalog_parity.rs`, which fails if the +//! catalog and [`all`] ever disagree on the set or order of domains; the +//! catalog-derived wire-key gate is schema-based and lands with the stacked +//! schemars change, with per-type strictness spot checks in +//! `tests/v3_conformance.rs`). +//! +//! **Versioning.** "v3" is the SQL schema generation (`eql_v3.*` domains). +//! The JSON envelope version is still `v: 2` ([`crate::EQL_SCHEMA_VERSION`]) — +//! every generated domain CHECK asserts `VALUE->>'v' = '2'`, and the wire +//! field names are unchanged from v2 (`hm`/`ob`/`bf`; the purpose-named +//! rename in the payload-scheme-discipline RFC is deferred). +//! +//! ## Shape of every payload +//! +//! Envelope (required by every domain CHECK, mirroring `ENVELOPE_KEYS` in +//! `eql-codegen/src/consts.rs`): `v`, `i`, `c`. Then the domain's required +//! term keys — `hm` for `_eq`, `ob` for `_ord`/`_ord_ore`, `bf` for +//! `_match`, none for storage-only. `Option` does not appear in this +//! module: the capability **is** the type identity. Hold a +//! [`int4::Int4Eq`] and `hm` is present, guaranteed by the Rust type and +//! (SQL-side) by the domain CHECK. A missing term key is a deserialization +//! error — the Rust analogue of the CHECK constraint. +//! +//! The types are also **strict**: every struct is +//! `#[serde(deny_unknown_fields)]`, so a payload carrying keys outside the +//! domain's set fails to deserialize rather than being silently stripped on +//! the next serialize (a pass-through consumer must not lose data it didn't +//! know about), and the `v` field is [`crate::SchemaVersion`], which rejects +//! any version other than `2`. +//! +//! ## Why there is no discriminated enum +//! +//! Cross-token: impossible — an `int4_eq` and an `int8_eq` payload are +//! byte-identical on the wire (`v`/`i`/`c`/`hm`); nothing discriminates them. +//! Per-token: deliberately omitted — an untagged enum over a token's domains +//! would discriminate by key-sniffing, the exact `v2_3::SteVecTerm` failure +//! mode this tier exists to retire, and `_ord` vs `_ord_ore` are identical +//! shapes that no sniffing can separate. Consumers read from a typed column +//! and already know the domain. + +use std::marker::PhantomData; + +pub mod date; +pub mod int2; +pub mod int4; +pub mod int8; +pub mod terms; +pub mod text; +pub mod timestamptz; + +/// The PostgreSQL schema every domain in this module inhabits. +pub const SQL_SCHEMA: &str = "eql_v3"; + +/// One v3 domain type — implemented by every payload type, so any payload +/// value can report the SQL domain it inhabits (`payload.sql_domain()`). +/// +/// Each token file implements this next to the type it describes; the SQL +/// domain string is defined exactly once, in that impl, and +/// `tests/catalog_parity.rs` cross-checks every entry of [`all`] against +/// `eql-scalars::CATALOG` — a typo'd or mis-ordered domain fails there. +/// Public so FFI consumers can enumerate the protocol surface too. +pub trait DomainType { + /// Fully-qualified SQL domain name, e.g. `"eql_v3.int4_eq"` — the + /// per-type fact everything else derives from, defined once in each + /// type's impl. + /// + /// `where Self: Sized` keeps the trait object-safe (the method is + /// excluded from the vtable); through `dyn DomainType`, use + /// [`Self::sql_domain`]. + fn sql_domain_static() -> &'static str + where + Self: Sized; + + /// Fully-qualified SQL domain name of this payload value. + fn sql_domain(&self) -> &'static str; + + /// Unqualified SQL domain name (e.g. `"int4_eq"`) — [`Self::sql_domain`] + /// minus the schema qualifier; matches `eql-scalars` + /// `ScalarSpec::domain_name`. + fn domain(&self) -> &'static str { + self.sql_domain() + .strip_prefix("eql_v3.") + .expect("sql_domain must be qualified with the eql_v3 schema") + } +} + +/// Type-level handle: lets [`all`] enumerate the domain types without +/// payload values to box — `Box::new(PhantomData::)` is zero-sized, +/// and the delegation goes through [`DomainType::sql_domain_static`], so no +/// payload instance is ever constructed. +impl DomainType for PhantomData +where + T: DomainType, +{ + fn sql_domain_static() -> &'static str { + T::sql_domain_static() + } + + fn sql_domain(&self) -> &'static str { + T::sql_domain_static() + } +} + +/// Every v3 domain type, in `eql-scalars::CATALOG` order (token order, then +/// each token's domains in manifest order) — the one hand-maintained list of +/// types in the crate. +pub fn all() -> Vec> { + vec![ + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + Box::new(PhantomData::), + ] +} diff --git a/crates/eql-types/src/v3/terms.rs b/crates/eql-types/src/v3/terms.rs new file mode 100644 index 00000000..ddad74bf --- /dev/null +++ b/crates/eql-types/src/v3/terms.rs @@ -0,0 +1,64 @@ +//! Reusable wire-field newtypes shared by every v3 domain payload. +//! +//! Each newtype serializes as its inner value (serde's newtype-struct +//! default), so the wire shape is unchanged — but the *name* survives into +//! generated artifacts: the TypeScript bindings and JSON Schemas (added in +//! stacked changes) emit these as named aliases/definitions that every +//! domain type references. A plain Rust `type` alias would vanish there. +//! +//! Names follow the SEM constructor names in `eql-scalars` (`Term::ctor()`): +//! a future scheme change (e.g. a 12-block wide ORE term for timestamptz +//! ordering) is a new newtype, not a hunt through `Vec` fields. + +use serde::{Deserialize, Serialize}; + +/// mp_base85 source ciphertext — the `c` envelope key. +/// +/// Required by every v3 domain CHECK; present on every payload. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct Ciphertext(pub String); + +/// HMAC-SHA-256 equality term — the `hm` wire key. Backs the `_eq` domains +/// (`=`, `<>`). SQL-side constructor: `eql_v3.hmac_256`. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct Hmac256(pub String); + +/// Block-ORE (u64, 8 blocks, 256) order term — the `ob` wire key. Backs the +/// `_ord` / `_ord_ore` domains (`=` `<>` `<` `<=` `>` `>=`); ORE is lossless +/// over the scalar's domain, so it serves equality too. SQL-side constructor: +/// `eql_v3.ore_block_u64_8_256`. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct OreBlockU64_8_256(pub Vec); + +/// Bloom-filter match term — the `bf` wire key. Backs the `_match` domains +/// (`~~` containment via `@>`/`<@`). +/// +/// **Signed** i16, not u16: EQL stores the filter as PostgreSQL `smallint[]`, +/// and filters sized above 32768 emit upper-half bit positions as negative +/// signed values. +#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)] +pub struct BloomFilter(pub Vec); + +impl From for Ciphertext { + fn from(value: String) -> Self { + Self(value) + } +} + +impl From for Hmac256 { + fn from(value: String) -> Self { + Self(value) + } +} + +impl From> for OreBlockU64_8_256 { + fn from(value: Vec) -> Self { + Self(value) + } +} + +impl From> for BloomFilter { + fn from(value: Vec) -> Self { + Self(value) + } +} diff --git a/crates/eql-types/src/v3/text.rs b/crates/eql-types/src/v3/text.rs new file mode 100644 index 00000000..9e11fc4d --- /dev/null +++ b/crates/eql-types/src/v3/text.rs @@ -0,0 +1,133 @@ +//! The `text` encrypted-domain family — the ordered shape of +//! [`crate::v3::int4`] plus a `_match` domain backed by the Bloom-filter +//! term (`@>`/`<@` containment for `LIKE`-style matching). + +use crate::v3::terms::{BloomFilter, Ciphertext, Hmac256, OreBlockU64_8_256}; +use crate::v3::DomainType; +use crate::{Identifier, SchemaVersion}; +use serde::{Deserialize, Serialize}; + +/// `eql_v3.text` — storage only; every operator is blocked. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Text { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, +} + +impl DomainType for Text { + fn sql_domain_static() -> &'static str { + "eql_v3.text" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.text_eq` — HMAC equality (`=`, `<>`). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct TextEq { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// HMAC-SHA-256 equality term. + pub hm: Hmac256, +} + +impl DomainType for TextEq { + fn sql_domain_static() -> &'static str { + "eql_v3.text_eq" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.text_match` — Bloom-filter containment match. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct TextMatch { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// Bloom-filter match term (signed smallint bit positions). + pub bf: BloomFilter, +} + +impl DomainType for TextMatch { + fn sql_domain_static() -> &'static str { + "eql_v3.text_match" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.text_ord_ore` — full lexicographic comparison, +/// scheme-explicit name. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct TextOrdOre { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// Block-ORE order term. Serves equality too. + pub ob: OreBlockU64_8_256, +} + +impl DomainType for TextOrdOre { + fn sql_domain_static() -> &'static str { + "eql_v3.text_ord_ore" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.text_ord` — full lexicographic comparison +/// (`=` `<>` `<` `<=` `>` `>=`). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct TextOrd { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// Block-ORE order term. Serves equality too. + pub ob: OreBlockU64_8_256, +} + +impl DomainType for TextOrd { + fn sql_domain_static() -> &'static str { + "eql_v3.text_ord" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} diff --git a/crates/eql-types/src/v3/timestamptz.rs b/crates/eql-types/src/v3/timestamptz.rs new file mode 100644 index 00000000..6c4621b7 --- /dev/null +++ b/crates/eql-types/src/v3/timestamptz.rs @@ -0,0 +1,58 @@ +//! The `timestamptz` encrypted-domain family — **equality-only** (storage + +//! `_eq`). There is no ordered domain: cipherstash encrypts timestamps at +//! native 12-block ORE width, but EQL's only ORE comparator is hardcoded to +//! 8 blocks, so an ordered timestamptz domain would silently mis-order. +//! Ordering arrives with a future wide-ORE term (see `eql-scalars`). + +use crate::v3::terms::{Ciphertext, Hmac256}; +use crate::v3::DomainType; +use crate::{Identifier, SchemaVersion}; +use serde::{Deserialize, Serialize}; + +/// `eql_v3.timestamptz` — storage only; every operator is blocked. +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct Timestamptz { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, +} + +impl DomainType for Timestamptz { + fn sql_domain_static() -> &'static str { + "eql_v3.timestamptz" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} + +/// `eql_v3.timestamptz_eq` — HMAC equality (`=`, `<>`). +#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[serde(deny_unknown_fields)] +pub struct TimestamptzEq { + /// Envelope version — always `2` (`EQL_SCHEMA_VERSION`); any other + /// value fails deserialization. + pub v: SchemaVersion, + /// Table/column identifier. Required by the domain CHECK. + pub i: Identifier, + /// mp_base85 source ciphertext. Required by the domain CHECK. + pub c: Ciphertext, + /// HMAC-SHA-256 equality term. + pub hm: Hmac256, +} + +impl DomainType for TimestamptzEq { + fn sql_domain_static() -> &'static str { + "eql_v3.timestamptz_eq" + } + + fn sql_domain(&self) -> &'static str { + Self::sql_domain_static() + } +} diff --git a/crates/eql-types/tests/catalog_parity.rs b/crates/eql-types/tests/catalog_parity.rs new file mode 100644 index 00000000..94ff8eb7 --- /dev/null +++ b/crates/eql-types/tests/catalog_parity.rs @@ -0,0 +1,25 @@ +//! The drift gate: the v3 domain inventory must mirror `eql-scalars::CATALOG` +//! — the same catalog that generates the `eql_v3` SQL surface — exactly: +//! every domain, in catalog order. Append a scalar to the catalog without +//! adding its types (and their `all()` entries) and this fails. +//! +//! Wire-key strictness (required term keys, unknown-key rejection, envelope +//! version) is covered behaviourally per-type in `tests/v3_conformance.rs`, +//! and pinned against the catalog by the JSON Schema parity test in the +//! stacked schemars change. + +use eql_scalars::CATALOG; +use eql_types::v3; + +#[test] +fn inventory_exactly_covers_catalog() { + let expected: Vec = CATALOG + .iter() + .flat_map(|spec| spec.domains.iter().map(|d| spec.domain_name(d))) + .collect(); + let actual: Vec<&str> = v3::all().iter().map(|e| e.domain()).collect(); + assert_eq!( + actual, expected, + "v3::all() must list every CATALOG domain, in catalog order" + ); +} diff --git a/crates/eql-types/tests/v3_conformance.rs b/crates/eql-types/tests/v3_conformance.rs new file mode 100644 index 00000000..d8eecb54 --- /dev/null +++ b/crates/eql-types/tests/v3_conformance.rs @@ -0,0 +1,251 @@ +//! Conformance for the v3 tier: explicit, readable tests for the reference +//! token (`int4`) plus the term shapes it doesn't carry. The exhaustive +//! catalog-driven sweep (every domain, every required key) lives in +//! `catalog_parity.rs`. + +use eql_types::v3::int4::{Int4, Int4Eq, Int4Ord, Int4OrdOre}; +use eql_types::v3::text::TextMatch; +use eql_types::v3::DomainType; +use serde_json::json; + +#[test] +fn int4_storage_round_trips() { + let wire = json!({ + "v": 2, + "i": { "t": "users", "c": "age" }, + "c": "mp_base85_ciphertext" + }); + let parsed: Int4 = serde_json::from_value(wire.clone()).unwrap(); + assert_eq!(serde_json::to_value(&parsed).unwrap(), wire); + assert_eq!(Int4::sql_domain_static(), "eql_v3.int4"); +} + +#[test] +fn int4_eq_round_trips() { + let wire = json!({ + "v": 2, + "i": { "t": "users", "c": "age" }, + "c": "mp_base85_ciphertext", + "hm": "deadbeef" + }); + let parsed: Int4Eq = serde_json::from_value(wire.clone()).unwrap(); + assert_eq!(serde_json::to_value(&parsed).unwrap(), wire); + assert_eq!(Int4Eq::sql_domain_static(), "eql_v3.int4_eq"); +} + +#[test] +fn int4_ord_round_trips() { + let wire = json!({ + "v": 2, + "i": { "t": "users", "c": "age" }, + "c": "mp_base85_ciphertext", + "ob": ["ore_block_0", "ore_block_1"] + }); + let parsed: Int4Ord = serde_json::from_value(wire.clone()).unwrap(); + assert_eq!(serde_json::to_value(&parsed).unwrap(), wire); + // `_ord_ore` is the same shape under the scheme-explicit domain name. + let parsed: Int4OrdOre = serde_json::from_value(wire.clone()).unwrap(); + assert_eq!(serde_json::to_value(&parsed).unwrap(), wire); + assert_eq!(Int4OrdOre::sql_domain_static(), "eql_v3.int4_ord_ore"); +} + +#[test] +fn int4_eq_rejects_missing_hmac() { + // The capability is type-enforced: an `int4_eq` payload with no `hm` is + // not representable. This is the bug class — a search term missing its + // index term — closed at the type boundary, before any consumer runs. + let no_hm = json!({ + "v": 2, + "i": { "t": "users", "c": "age" }, + "c": "mp_base85_ciphertext" + }); + let result: Result = serde_json::from_value(no_hm); + assert!(result.is_err(), "Int4Eq must reject a payload with no hm"); +} + +#[test] +fn rejects_missing_envelope_keys() { + // v/i/c are the shared envelope contract every domain CHECK asserts. The + // missing-term negatives cover hm/ob/bf; these cover the envelope itself — + // dropping the version, identifier, or ciphertext fails at the type + // boundary, the Rust analogue of the CHECK's NOT NULL envelope columns. + let base = json!({ + "v": 2, + "i": { "t": "users", "c": "age" }, + "c": "mp_base85_ciphertext", + "hm": "deadbeef" + }); + for key in ["v", "i", "c"] { + let mut wire = base.clone(); + wire.as_object_mut().unwrap().remove(key); + let result: Result = serde_json::from_value(wire); + assert!( + result.is_err(), + "Int4Eq must reject a payload with no {key}" + ); + } +} + +#[test] +fn rejects_wrong_envelope_version() { + // The SchemaVersion field is the Rust analogue of the domain CHECK's + // `VALUE->>'v' = '2'`: any other version — including a string "2", + // which the CHECK's `->>` coercion would accept — fails at the type + // boundary instead of at INSERT. + for v in [json!(1), json!(3), json!("2")] { + let wire = json!({ + "v": v, + "i": { "t": "users", "c": "age" }, + "c": "mp_base85_ciphertext", + "hm": "deadbeef" + }); + let result: Result = serde_json::from_value(wire); + assert!(result.is_err(), "Int4Eq must reject v = {v}"); + } +} + +#[test] +fn rejects_unknown_keys() { + // deny_unknown_fields: a payload carrying keys outside the domain's set + // is not silently accepted-and-stripped — a pass-through consumer must + // not lose data it didn't know about. + let wire = json!({ + "v": 2, + "i": { "t": "users", "c": "age" }, + "c": "mp_base85_ciphertext", + "hm": "deadbeef", + "ob": ["ore_block_0"] + }); + let result: Result = serde_json::from_value(wire); + assert!( + result.is_err(), + "Int4Eq must reject a payload carrying keys beyond its domain (here: ob)" + ); +} + +#[test] +fn int4_ord_rejects_missing_ore_term() { + let no_ob = json!({ + "v": 2, + "i": { "t": "users", "c": "age" }, + "c": "mp_base85_ciphertext", + "hm": "deadbeef" + }); + let result: Result = serde_json::from_value(no_ob); + assert!(result.is_err(), "Int4Ord must reject a payload with no ob"); +} + +#[test] +fn text_match_round_trips_signed_bloom_filter() { + // `bf` is signed i16 (smallint[]): filters sized above 32768 emit + // upper-half bit positions as negative values. + let wire = json!({ + "v": 2, + "i": { "t": "users", "c": "email" }, + "c": "mp_base85_ciphertext", + "bf": [-1, -32768, 32767, 0] + }); + let parsed: TextMatch = serde_json::from_value(wire.clone()).unwrap(); + assert_eq!(serde_json::to_value(&parsed).unwrap(), wire); + + let no_bf = json!({ + "v": 2, + "i": { "t": "users", "c": "email" }, + "c": "mp_base85_ciphertext" + }); + let result: Result = serde_json::from_value(no_bf); + assert!( + result.is_err(), + "TextMatch must reject a payload with no bf" + ); +} + +#[test] +fn non_int4_tokens_round_trip_every_domain() { + // int4 is exercised exhaustively above; the other ordered tokens carry the + // *same* wire field names but were serialized by no test, so a copy-paste + // field typo (e.g. `hm` -> `hmm` in `int8.rs`) would ship green — + // `catalog_parity.rs` checks domain *names* only, never the wire shape. + // This sweep roundtrips every non-int4 domain and pins its catalog name, + // failing the instant a token drifts from the shared envelope/term contract. + use eql_types::v3::{date::*, int2::*, int8::*, text::*}; + + // Wire builders for the three shapes the ordered tokens share. + let storage = |t: &str| json!({ "v": 2, "i": { "t": t, "c": "x" }, "c": "ct" }); + let eq = |t: &str| json!({ "v": 2, "i": { "t": t, "c": "x" }, "c": "ct", "hm": "deadbeef" }); + let ord = |t: &str| json!({ "v": 2, "i": { "t": t, "c": "x" }, "c": "ct", "ob": ["b0", "b1"] }); + + // Roundtrip a payload byte-for-byte, then confirm the catalog domain name. + macro_rules! round_trip { + ($ty:ty, $wire:expr, $domain:expr) => {{ + let wire = $wire; + let parsed: $ty = serde_json::from_value(wire.clone()).unwrap(); + assert_eq!(serde_json::to_value(&parsed).unwrap(), wire); + assert_eq!(<$ty>::sql_domain_static(), $domain); + }}; + } + + round_trip!(Int2, storage("a"), "eql_v3.int2"); + round_trip!(Int2Eq, eq("a"), "eql_v3.int2_eq"); + round_trip!(Int2Ord, ord("a"), "eql_v3.int2_ord"); + round_trip!(Int2OrdOre, ord("a"), "eql_v3.int2_ord_ore"); + + round_trip!(Int8, storage("a"), "eql_v3.int8"); + round_trip!(Int8Eq, eq("a"), "eql_v3.int8_eq"); + round_trip!(Int8Ord, ord("a"), "eql_v3.int8_ord"); + round_trip!(Int8OrdOre, ord("a"), "eql_v3.int8_ord_ore"); + + round_trip!(Date, storage("a"), "eql_v3.date"); + round_trip!(DateEq, eq("a"), "eql_v3.date_eq"); + round_trip!(DateOrd, ord("a"), "eql_v3.date_ord"); + round_trip!(DateOrdOre, ord("a"), "eql_v3.date_ord_ore"); + + // text_match is covered by `text_match_round_trips_signed_bloom_filter`. + round_trip!(Text, storage("a"), "eql_v3.text"); + round_trip!(TextEq, eq("a"), "eql_v3.text_eq"); + round_trip!(TextOrd, ord("a"), "eql_v3.text_ord"); + round_trip!(TextOrdOre, ord("a"), "eql_v3.text_ord_ore"); +} + +#[test] +fn timestamptz_round_trips_and_enforces_equality_term() { + // The one structurally-distinct token: equality-only, no `_ord`/`_ord_ore` + // (the 8-block-ORE limitation). The int4 template was copy-pasted to + // produce it, so an accidental extra `ob` field or a dropped `hm` would + // pass `catalog_parity` (domain names only) but is caught here. + use eql_types::v3::timestamptz::{Timestamptz, TimestamptzEq}; + + // Storage-only: envelope, no term. + let storage = json!({ + "v": 2, + "i": { "t": "events", "c": "occurred_at" }, + "c": "mp_base85_ciphertext" + }); + let parsed: Timestamptz = serde_json::from_value(storage.clone()).unwrap(); + assert_eq!(serde_json::to_value(&parsed).unwrap(), storage); + assert_eq!(Timestamptz::sql_domain_static(), "eql_v3.timestamptz"); + + // Equality: envelope + hm. + let with_hm = json!({ + "v": 2, + "i": { "t": "events", "c": "occurred_at" }, + "c": "mp_base85_ciphertext", + "hm": "deadbeef" + }); + let parsed: TimestamptzEq = serde_json::from_value(with_hm.clone()).unwrap(); + assert_eq!(serde_json::to_value(&parsed).unwrap(), with_hm); + assert_eq!(TimestamptzEq::sql_domain_static(), "eql_v3.timestamptz_eq"); + + // `_eq` is the only searchable shape this token has, so its equality term + // cannot silently become optional. + let no_hm = json!({ + "v": 2, + "i": { "t": "events", "c": "occurred_at" }, + "c": "mp_base85_ciphertext" + }); + let result: Result = serde_json::from_value(no_hm); + assert!( + result.is_err(), + "TimestamptzEq must reject a payload with no hm" + ); +} diff --git a/docs/reference/schema/eql-payload-v2.3.schema.json b/docs/reference/schema/eql-payload-v2.3.schema.json index 0b1e5741..02df122a 100644 --- a/docs/reference/schema/eql-payload-v2.3.schema.json +++ b/docs/reference/schema/eql-payload-v2.3.schema.json @@ -184,9 +184,9 @@ "bf": { "title": "Bloom filter (bf)", - "description": "Bloom filter representation as an array of set bit positions. Used by `LIKE` / `ILIKE` (`~~`, `~~*`) via `eql_v2.bloom_filter` and the corresponding GIN index.", + "description": "Bloom filter representation as an array of set bit positions. Used by `LIKE` / `ILIKE` (`~~`, `~~*`) via `eql_v2.bloom_filter` and the corresponding GIN index. Stored as `smallint[]` (signed `int2`): the filter size is a power of two up to 65536, so positions in the upper half of a filter larger than 32768 are emitted as negative signed values (two's-complement of the unsigned position). Consumers must use a signed 16-bit integer type.", "type": "array", - "items": { "type": "integer", "minimum": 0 } + "items": { "type": "integer", "minimum": -32768, "maximum": 32767 } }, "ob": { diff --git a/mise.toml b/mise.toml index 7e34b78e..0b210278 100644 --- a/mise.toml +++ b/mise.toml @@ -119,11 +119,12 @@ description = "Compile, lint and test the std-only Rust workspace crates (no dat dir = "{{config_root}}" run = """ #!/usr/bin/env bash -# eql-scalars / eql-codegen / eql-tests-macros are the lean workspace members. -# Scope explicitly to them (NOT --workspace): a workspace-wide test would drag -# in tests/sqlx, whose suite needs Postgres + CS_* secrets and is already -# covered by the `test` job. eql-tests-macros only pulls syn/quote/proc-macro2, -# so it stays in the lean set. clippy is likewise scoped — a workspace clippy +# eql-scalars / eql-codegen / eql-tests-macros / eql-types are the lean +# workspace members. Scope explicitly to them (NOT --workspace): a +# workspace-wide test would drag in tests/sqlx, whose suite needs Postgres + +# CS_* secrets and is already covered by the `test` job. eql-tests-macros only +# pulls syn/quote/proc-macro2 and eql-types only serde/serde_json, so they +# stay in the lean set. clippy is likewise scoped — a workspace clippy # recompiles the heavy sqlx/tokio/cipherstash-client tree for no added coverage # of these crates. # bash is pinned via the `#!/usr/bin/env bash` shebang above (mise honors a @@ -131,8 +132,8 @@ run = """ # /bin/sh (dash on the CI images). set -euo pipefail cargo fmt --check -cargo clippy -p eql-scalars -p eql-codegen -p eql-tests-macros --all-targets -- -D warnings -cargo test -p eql-scalars -p eql-codegen -p eql-tests-macros +cargo clippy -p eql-scalars -p eql-codegen -p eql-tests-macros -p eql-types --all-targets -- -D warnings +cargo test -p eql-scalars -p eql-codegen -p eql-tests-macros -p eql-types """ [tasks."test:matrix:inventory"] diff --git a/tests/sqlx/src/scalar_domains.rs b/tests/sqlx/src/scalar_domains.rs index 25ad3605..55bf26c8 100644 --- a/tests/sqlx/src/scalar_domains.rs +++ b/tests/sqlx/src/scalar_domains.rs @@ -502,7 +502,10 @@ impl Variant { /// matrix `payload_check` arm iterates this to assert each key's /// absence is rejected at the cast. pub fn payload_required_keys(self) -> impl Iterator { - ["v", "i", "c"].into_iter().chain(self.required_term()) + eql_scalars::ENVELOPE_KEYS + .iter() + .copied() + .chain(self.required_term()) } pub const fn supports_eq(self) -> bool {