Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/eql-codegen/src/consts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pub(crate) const SCHEMA: &str = "eql_v3";
pub(crate) const ENVELOPE_KEYS: &[&str] = &["v", "i", "c"];

/// Escape a string for use inside a single-quoted SQL literal by doubling
/// embedded single quotes. Port of templates.py `_sql_str`.
/// embedded single quotes.
pub(crate) fn sql_str(s: &str) -> String {
s.replace('\'', "''")
}
Expand Down
2 changes: 1 addition & 1 deletion crates/eql-codegen/src/generate.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! File renderers and orchestrator (port of generate.py).
//! File renderers and orchestrator.

use std::path::{Path, PathBuf};

Expand Down
2 changes: 1 addition & 1 deletion crates/eql-codegen/src/operator_surface.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! The generated operator surface (port of operator_surface.py).
//! The generated operator surface.

/// One operator in the generated surface.
#[derive(Clone, Copy)]
Expand Down
2 changes: 1 addition & 1 deletion crates/eql-codegen/src/writer.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
//! Ownership-guarded file writer (port of writer.py).
//! Ownership-guarded file writer.

use std::fs;
use std::io;
Expand Down
28 changes: 11 additions & 17 deletions crates/eql-scalars/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//! Scalar/term catalog for EQL encrypted-domain codegen — the Rust source of
//! truth replacing `tasks/codegen/{scalars,terms,spec}.py` and the
//! `types/*.toml` manifests. Std-only, no dependencies.
//! Scalar/term catalog for EQL encrypted-domain codegen — the single Rust
//! source of truth for every scalar type, term, and fixture. Std-only, no
//! dependencies.
//!
//! `Fixture` is value-kind tagged (one non-generic enum, variant = value kind),
//! so a single `CATALOG` spans every scalar kind. Integer literals are
Expand Down Expand Up @@ -151,9 +151,8 @@ impl ScalarKind {

/// A fixed index term known to the scalar materializer.
///
/// Mirrors `terms.py`'s `TERM_CATALOG`. `Hm` provides equality; `Ore` provides
/// equality plus ordering. The `json_key`/`extractor`/`returns`/`ctor` values
/// are the cross-schema SQL contract and are copied verbatim from `terms.py` —
/// `Hm` provides equality; `Ore` provides equality plus ordering. The
/// `json_key`/`extractor`/`ctor` values are the cross-schema SQL contract —
/// changing one is a generated-SQL behaviour change, not a refactor.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Term {
Expand Down Expand Up @@ -215,8 +214,7 @@ impl Term {
}

impl Term {
/// Stable dedupe — first occurrence wins. The Rust analogue of
/// `terms.py`'s `dict.fromkeys` ordering contract.
/// Stable dedupe — first occurrence wins.
fn dedupe_preserving_order<'a>(items: impl IntoIterator<Item = &'a str>) -> Vec<&'a str> {
let mut out: Vec<&'a str> = Vec::new();
for item in items {
Expand All @@ -228,26 +226,23 @@ impl Term {
}

/// Supported operators for the union of a domain's terms (catalog order,
/// deduped). Mirrors `terms.py::operators_for_terms`.
/// deduped).
pub fn operators_for_terms(terms: &[Term]) -> Vec<&'static str> {
Self::dedupe_preserving_order(terms.iter().flat_map(|t| t.operators().iter().copied()))
}

/// JSON payload keys required by these terms (deduped, in order).
/// Mirrors `terms.py::term_json_keys`.
pub fn term_json_keys(terms: &[Term]) -> Vec<&'static str> {
Self::dedupe_preserving_order(terms.iter().map(|t| t.json_key()))
}

/// SQL `-- REQUIRE:` edges needed by these terms (deduped, in order).
/// Mirrors `terms.py::term_requires`.
pub fn term_requires(terms: &[Term]) -> Vec<&'static str> {
Self::dedupe_preserving_order(terms.iter().flat_map(|t| t.requires().iter().copied()))
}

/// The extractor that supports `op` for a domain carrying `terms`, or
/// `None`. First supporting term wins. Mirrors
/// `terms.py::extractor_for_operator`.
/// `None`. First supporting term wins.
pub fn extractor_for_operator(terms: &[Term], op: &str) -> Option<&'static str> {
terms
.iter()
Expand All @@ -256,8 +251,7 @@ impl Term {
}

/// Generated-file role label for a domain with these terms. No terms =>
/// `"storage"`; otherwise the first term's role. Mirrors
/// `terms.py::role_for_terms`.
/// `"storage"`; otherwise the first term's role.
pub fn role_for_terms(terms: &[Term]) -> &'static str {
match terms.first() {
None => "storage",
Expand Down Expand Up @@ -420,13 +414,13 @@ macro_rules! fixtures {
(date; $($s:literal),* $(,)?) => { &[$(Fixture::Date($s)),*] };
}

/// int4 fixture plaintexts — verbatim from `tasks/codegen/types/int4.toml`.
/// int4 fixture plaintexts.
/// `N(..)` literals are range-checked against `i32` at compile time.
const INT4_FIXTURES: &[Fixture] = fixtures!(int i32;
Min, N(-100), N(-1), Zero, N(1), N(2), N(5), N(10), N(17), N(25),
N(42), N(50), N(100), N(250), N(1000), N(9999), Max);

/// int2 fixture plaintexts — verbatim from `tasks/codegen/types/int2.toml`.
/// int2 fixture plaintexts.
/// `N(..)` literals are range-checked against `i16` at compile time.
const INT2_FIXTURES: &[Fixture] = fixtures!(int i16;
Min, N(-30000), N(-100), N(-1), Zero, N(1), N(2), N(5), N(10), N(17),
Expand Down
38 changes: 22 additions & 16 deletions docs/reference/adding-a-scalar-encrypted-domain-type.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,9 @@ than a runtime validator:
- **`fixtures`** — the type's plaintext fixture list (see below).

**Terms** are fixed by the `Term` enum (`crates/eql-scalars/src/lib.rs`). The
`json_key` / `extractor` / `returns` / `ctor` values are the cross-schema SQL
contract — changing one is a generated-SQL behaviour change, not a refactor:
`json_key` / `extractor` / `ctor` values are the cross-schema SQL contract (the
Returns column below is `eql_v3.` + `ctor`) — changing one is a generated-SQL
behaviour change, not a refactor:

| Term | JSON key | Extractor | Returns | Operators |
| ----- | -------- | ----------- | -------------------------------- | -------------------------- |
Expand All @@ -122,7 +123,7 @@ contract — changing one is a generated-SQL behaviour change, not a refactor:

A type that needs a non-ORE equality term on an ordered domain needs a **new
`Term`**, not a catalog flag. Adding a term is a code change to the `Term`
enum's `impl` methods (`json_key`, `extractor`, `returns`, `ctor`, `role`,
enum's `impl` methods (`json_key`, `extractor`, `ctor`, `role`,
`operators`, `requires`) with matching `#[test]`s (`term_tests` /
`term_helper_tests`) — never a free-form catalog field.

Expand Down Expand Up @@ -166,7 +167,7 @@ matrix-pivot requirement:
- `every_fixture_value_is_within_kind_bounds` keeps every resolved value in
range.

These are the compile/test-time analogue of the old `load_spec` validation.
These run at compile/test time rather than at generation time.
Beyond the pivots, choose values so range operators produce distinguishable
result counts, include useful boundaries, and cover omitted-term negative cases.

Expand Down Expand Up @@ -204,8 +205,9 @@ jsonb-backed and token-driven):
method (not a `const`), and the comparison pivots come from
`ScalarType::min_pivot()` / `max_pivot()` (zero stays `Default::default()`).
Integer impls return `Self::MIN`/`Self::MAX` (emitted by the proc-macro);
temporal impls return explicit sentinel dates and are **hand-written** in
`scalar_domains.rs` (the macro emits only integer impls). `to_sql_literal` is
temporal impls return explicit sentinel dates and are emitted by the
`temporal_values!` declarative macro in `scalar_domains.rs` (the proc-macro
emits only integer impls). `to_sql_literal` is
overridden to single-quote the value (`'1970-01-01'`), since a bare `Display`
date is not a valid SQL literal.
- **The sqlx `chrono` feature.** The test crate enables sqlx's `chrono` feature
Expand All @@ -219,13 +221,13 @@ jsonb-backed and token-driven):
## 3. Wire the SQLx matrix oracle

The generated SQL is enough to *install* the domains, but the
`ordered_numeric_matrix!` suite only runs once the Rust harness knows about the
`scalar_matrix!` suite only runs once the Rust harness knows about the
scalar. `<R>` is the scalar's Rust type (`i32` for `int4`, `i16` for `int2`).
There are now **two** registrations:

| File | Add |
|------|-----|
| `tests/sqlx/src/scalar_types.rs` | One `<T> => <R>` line in the `scalar_types!` list (e.g. `int8 => i64,`). This single line drives the `impl ScalarType`, the `eql_v2_<T>` fixture module, the `ordered_numeric_matrix!` suite, and the `generate_for_token` arm — all generated by the `eql-tests-macros` proc-macros. |
| `tests/sqlx/src/scalar_types.rs` | One `<T> => <R>` line in the `scalar_types!` list (e.g. `int8 => i64,`). This single line drives the `impl ScalarType`, the `eql_v2_<T>` fixture module, the `scalar_matrix!` suite, and the `generate_for_token` arm — all generated by the `eql-tests-macros` proc-macros. |
| `tests/sqlx/src/fixtures/eql_plaintext.rs` | A sealed `EqlPlaintext` impl for `<R>`: `impl Sealed for <R> {}` and `impl EqlPlaintext for <R>` carrying just `const KIND: ScalarKind` plus the value-typed `to_plaintext` → the right `Plaintext` variant. `CAST` and `PLAINTEXT_SQL_TYPE` are **derived** from `KIND` via the `cast_for_kind` / `plaintext_sql_type_for_kind` `const fn` defaults, so a brand-new integer kind needs an arm in those two helpers — not a per-type const. Keep the three `#[test]`s (cast / sql-type / to_plaintext) mirroring the existing ones. |

The single `<T> => <R>` line in `scalar_types.rs` is the harness source of
Expand All @@ -243,13 +245,17 @@ type but the binary has no `scalars::<T>::` tests. A catalog token absent from
the `scalar_types!` list also fails the `generate_for_token` catch-all loudly
at fixture-generation time.

The coverage these registrations unlock comes from the `ordered_numeric_matrix!`
The coverage these registrations unlock comes from the `scalar_matrix!`
convention wrapper in `tests/sqlx/src/matrix.rs`: one `impl ScalarType` plus a
single invocation taking `suite`, `scalar`, and `eql_type`. The matrix derives
its comparison pivots — the scalar's `MIN`, `MAX`, and zero
(`Default::default()`) — from the type rather than a hand-written list, so the
invocation carries no pivot argument. Equality-only scalars use the sibling
`eq_only_scalar_matrix!`. The `matrix.rs` module header is the canonical,
single invocation taking `suite`, `scalar`, `eql_type`, and a `caps` capability
marker. The matrix derives its comparison pivots — the scalar's `MIN`, `MAX`,
and zero (`Default::default()`) — from the type rather than a hand-written list,
so the invocation carries no pivot argument. `caps = [eq, ord]` selects the
ordered-numeric shape (all four variants; `=`/`<>`/`<`/`<=`/`>`/`>=`; ORDER BY /
ORDER BY USING; ORE injectivity); `caps = [eq]` selects the equality-only shape
(storage + `_eq` only; the four ord operators are deliberate blockers). Both
expand to the lower-level `scalar_domain_matrix!`. The `matrix.rs` module header
is the canonical,
current list of the categories the matrix emits (sanity, correctness,
cross-shape, supported-NULL, blocker raises, index engagement, ORDER BY, ORDER
BY USING) — read it rather than duplicating a count here. For ordered `int4`,
Expand Down Expand Up @@ -314,7 +320,7 @@ the type-generic generator: the templates are pure token substitution, so a
per-type baseline can only fail where `int4`'s already would. Drift protection
for a new type comes from the `int4` reference (shared templates + `Term` enum),
the catalog `values_tests` pinning the materialised `<T>_VALUES`, the
catalog/generator `#[test]`s, and the `ordered_numeric_matrix!` SQLx suite
catalog/generator `#[test]`s, and the `scalar_matrix!` SQLx suite
(behaviour, not bytes).

---
Expand Down Expand Up @@ -544,7 +550,7 @@ runs as `cargo run -p eql-codegen` (no subcommand), which calls
and matrix-inventory enumeration). `main` (`crates/eql-codegen/src/main.rs`)
recognises exactly these two forms; any other argument is a usage error.

The generator targets the `eql_v3` schema throughout: `CORE_SCHEMA = "eql_v3"`
The generator targets the `eql_v3` schema throughout: `SCHEMA = "eql_v3"`
(`crates/eql-codegen/src/consts.rs`) qualifies both the domain families and the
SEM index-term types the extractors return (`eql_v3.hmac_256`,
`eql_v3.ore_block_u64_8_256`), so no generated SQL references `eql_v2`.
Expand Down
28 changes: 13 additions & 15 deletions tests/sqlx/tests/encrypted_domain/family/jsonb_operator_surface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@
//!
//! The storage-only domains (`eql_v3.int4`, future scalars) promise that
//! *every* native jsonb operator is blocked, so an encrypted column can never
//! fall through to plaintext-jsonb semantics. That promise rests on three
//! hand-maintained lists in `tasks/codegen/operator_surface.py`
//! (`SYMMETRIC_OPERATORS`, `PATH_OPERATORS`, `BLOCKER_ONLY_OPERATORS`), whose
//! union is `KNOWN_JSONB_OPERATORS`.
//! fall through to plaintext-jsonb semantics. That promise rests on the
//! enumerated operator surface in `crates/eql-codegen/src/operator_surface.rs`
//! (the `OPERATORS` const), mirrored locally below as `KNOWN_JSONB_OPERATORS`.
//!
//! Those lists are an *enumeration*, not a structural guarantee: a future PG
//! version could add a jsonb operator that nobody adds here, and it would
Expand All @@ -16,18 +15,17 @@
//! e.g. `~~` / `~~*`) are excluded — they are not native and are unreachable
//! from a storage scalar domain.
//!
//! Source of truth: `tasks/codegen/operator_surface.py::KNOWN_JSONB_OPERATORS`
//! (asserted complete by `tasks/codegen/test_operator_surface.py`). The set
//! below is hardcoded — the lowest-friction bridge from a Python constant to a
//! Rust test — and must be kept in sync with that module. If you add an
//! operator there, add it here; the Python test pins the union so the two can
//! only drift in this file.
//! Source of truth: `crates/eql-codegen/src/operator_surface.rs` (the
//! `OPERATORS` const, pinned at 20 entries by its own unit tests). The set
//! below is a hardcoded mirror and must be kept in sync with that module. If
//! you add an operator there, add it here.

use anyhow::Result;
use sqlx::PgPool;

/// Mirror of `KNOWN_JSONB_OPERATORS` in
/// `tasks/codegen/operator_surface.py`. Keep in sync with that module.
/// Mirror of the enumerated operator surface in
/// `crates/eql-codegen/src/operator_surface.rs` (`OPERATORS`). Keep in sync
/// with that module.
const KNOWN_JSONB_OPERATORS: &[&str] = &[
// symmetric (supported wrappers)
"=", "<>", "<", "<=", ">", ">=", "@>", "<@", //
Expand Down Expand Up @@ -77,11 +75,11 @@ async fn every_native_jsonb_operator_is_known_to_the_generator(pool: PgPool) ->
assert!(
missing.is_empty(),
"PostgreSQL exposes jsonb operator(s) not enumerated in \
tasks/codegen/operator_surface.py (KNOWN_JSONB_OPERATORS): {missing:#?}. \
crates/eql-codegen/src/operator_surface.rs (OPERATORS): {missing:#?}. \
A storage-only encrypted domain would route these to native \
plaintext-jsonb semantics instead of an EQL blocker. Add each symbol \
to the appropriate list in operator_surface.py (and to the mirror in \
this test) and regenerate the SQL surface."
to OPERATORS in operator_surface.rs (and to the mirror in this test) \
and regenerate the SQL surface."
);

Ok(())
Expand Down