From 2465f43a270deb64c0310522cac996b87d986082 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 9 Jun 2026 10:00:10 +1000 Subject: [PATCH] docs(v3): sync scalar-domain guide with implementation; drop deleted-Python refs Cross-checked docs/reference/adding-a-scalar-encrypted-domain-type.md against the current eql-scalars / eql-codegen / sqlx harness and corrected stale claims: - Matrix macros: the two wrappers ordered_numeric_matrix! / eq_only_scalar_matrix! were unified into scalar_matrix! (caps = [eq] / [eq, ord] over scalar_domain_matrix!). - Term::returns is not a real method; the methods are json_key/extractor/ctor/ role/operators/requires (the Returns column is eql_v3. + ctor). - Schema const is SCHEMA, not CORE_SCHEMA. - Temporal impls are emitted by the temporal_values! declarative macro, not hand-written. Also removed every dangling reference to the deleted Python codegen toolchain (tasks/codegen/*.py, terms.py, templates.py, operator_surface.py, *.toml type manifests, load_spec) across eql-scalars, eql-codegen, and the jsonb operator surface guard test, re-pointing them at the Rust sources of truth. The CHANGELOG [Unreleased] entry that documents the removal is left intact. --- crates/eql-codegen/src/consts.rs | 2 +- crates/eql-codegen/src/generate.rs | 2 +- crates/eql-codegen/src/operator_surface.rs | 2 +- crates/eql-codegen/src/writer.rs | 2 +- crates/eql-scalars/src/lib.rs | 28 ++++++-------- .../adding-a-scalar-encrypted-domain-type.md | 38 +++++++++++-------- .../family/jsonb_operator_surface.rs | 28 +++++++------- 7 files changed, 50 insertions(+), 52 deletions(-) diff --git a/crates/eql-codegen/src/consts.rs b/crates/eql-codegen/src/consts.rs index 0f7a1ef7..9266882f 100644 --- a/crates/eql-codegen/src/consts.rs +++ b/crates/eql-codegen/src/consts.rs @@ -17,7 +17,7 @@ pub(crate) const SCHEMA: &str = "eql_v3"; pub(crate) const ENVELOPE_KEYS: &[&str] = &["v", "i", "c"]; /// Escape a string for use inside a single-quoted SQL literal by doubling -/// embedded single quotes. Port of templates.py `_sql_str`. +/// embedded single quotes. pub(crate) fn sql_str(s: &str) -> String { s.replace('\'', "''") } diff --git a/crates/eql-codegen/src/generate.rs b/crates/eql-codegen/src/generate.rs index 622ca634..f428e475 100644 --- a/crates/eql-codegen/src/generate.rs +++ b/crates/eql-codegen/src/generate.rs @@ -1,4 +1,4 @@ -//! File renderers and orchestrator (port of generate.py). +//! File renderers and orchestrator. use std::path::{Path, PathBuf}; diff --git a/crates/eql-codegen/src/operator_surface.rs b/crates/eql-codegen/src/operator_surface.rs index 38f1ec19..19237dde 100644 --- a/crates/eql-codegen/src/operator_surface.rs +++ b/crates/eql-codegen/src/operator_surface.rs @@ -1,4 +1,4 @@ -//! The generated operator surface (port of operator_surface.py). +//! The generated operator surface. /// One operator in the generated surface. #[derive(Clone, Copy)] diff --git a/crates/eql-codegen/src/writer.rs b/crates/eql-codegen/src/writer.rs index 4d310fc8..153bc8e6 100644 --- a/crates/eql-codegen/src/writer.rs +++ b/crates/eql-codegen/src/writer.rs @@ -1,4 +1,4 @@ -//! Ownership-guarded file writer (port of writer.py). +//! Ownership-guarded file writer. use std::fs; use std::io; diff --git a/crates/eql-scalars/src/lib.rs b/crates/eql-scalars/src/lib.rs index d8d3d7fb..a6b67f6b 100644 --- a/crates/eql-scalars/src/lib.rs +++ b/crates/eql-scalars/src/lib.rs @@ -1,6 +1,6 @@ -//! Scalar/term catalog for EQL encrypted-domain codegen — the Rust source of -//! truth replacing `tasks/codegen/{scalars,terms,spec}.py` and the -//! `types/*.toml` manifests. Std-only, no dependencies. +//! Scalar/term catalog for EQL encrypted-domain codegen — the single Rust +//! source of truth for every scalar type, term, and fixture. Std-only, no +//! dependencies. //! //! `Fixture` is value-kind tagged (one non-generic enum, variant = value kind), //! so a single `CATALOG` spans every scalar kind. Integer literals are @@ -151,9 +151,8 @@ impl ScalarKind { /// A fixed index term known to the scalar materializer. /// -/// Mirrors `terms.py`'s `TERM_CATALOG`. `Hm` provides equality; `Ore` provides -/// equality plus ordering. The `json_key`/`extractor`/`returns`/`ctor` values -/// are the cross-schema SQL contract and are copied verbatim from `terms.py` — +/// `Hm` provides equality; `Ore` provides equality plus ordering. The +/// `json_key`/`extractor`/`ctor` values are the cross-schema SQL contract — /// changing one is a generated-SQL behaviour change, not a refactor. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Term { @@ -215,8 +214,7 @@ impl Term { } impl Term { - /// Stable dedupe — first occurrence wins. The Rust analogue of - /// `terms.py`'s `dict.fromkeys` ordering contract. + /// Stable dedupe — first occurrence wins. fn dedupe_preserving_order<'a>(items: impl IntoIterator) -> Vec<&'a str> { let mut out: Vec<&'a str> = Vec::new(); for item in items { @@ -228,26 +226,23 @@ impl Term { } /// Supported operators for the union of a domain's terms (catalog order, - /// deduped). Mirrors `terms.py::operators_for_terms`. + /// deduped). pub fn operators_for_terms(terms: &[Term]) -> Vec<&'static str> { Self::dedupe_preserving_order(terms.iter().flat_map(|t| t.operators().iter().copied())) } /// JSON payload keys required by these terms (deduped, in order). - /// Mirrors `terms.py::term_json_keys`. pub fn term_json_keys(terms: &[Term]) -> Vec<&'static str> { Self::dedupe_preserving_order(terms.iter().map(|t| t.json_key())) } /// SQL `-- REQUIRE:` edges needed by these terms (deduped, in order). - /// Mirrors `terms.py::term_requires`. pub fn term_requires(terms: &[Term]) -> Vec<&'static str> { Self::dedupe_preserving_order(terms.iter().flat_map(|t| t.requires().iter().copied())) } /// The extractor that supports `op` for a domain carrying `terms`, or - /// `None`. First supporting term wins. Mirrors - /// `terms.py::extractor_for_operator`. + /// `None`. First supporting term wins. pub fn extractor_for_operator(terms: &[Term], op: &str) -> Option<&'static str> { terms .iter() @@ -256,8 +251,7 @@ impl Term { } /// Generated-file role label for a domain with these terms. No terms => - /// `"storage"`; otherwise the first term's role. Mirrors - /// `terms.py::role_for_terms`. + /// `"storage"`; otherwise the first term's role. pub fn role_for_terms(terms: &[Term]) -> &'static str { match terms.first() { None => "storage", @@ -420,13 +414,13 @@ macro_rules! fixtures { (date; $($s:literal),* $(,)?) => { &[$(Fixture::Date($s)),*] }; } -/// int4 fixture plaintexts — verbatim from `tasks/codegen/types/int4.toml`. +/// int4 fixture plaintexts. /// `N(..)` literals are range-checked against `i32` at compile time. const INT4_FIXTURES: &[Fixture] = fixtures!(int i32; Min, N(-100), N(-1), Zero, N(1), N(2), N(5), N(10), N(17), N(25), N(42), N(50), N(100), N(250), N(1000), N(9999), Max); -/// int2 fixture plaintexts — verbatim from `tasks/codegen/types/int2.toml`. +/// int2 fixture plaintexts. /// `N(..)` literals are range-checked against `i16` at compile time. const INT2_FIXTURES: &[Fixture] = fixtures!(int i16; Min, N(-30000), N(-100), N(-1), Zero, N(1), N(2), N(5), N(10), N(17), diff --git a/docs/reference/adding-a-scalar-encrypted-domain-type.md b/docs/reference/adding-a-scalar-encrypted-domain-type.md index 2f95c20c..7cf73ccb 100644 --- a/docs/reference/adding-a-scalar-encrypted-domain-type.md +++ b/docs/reference/adding-a-scalar-encrypted-domain-type.md @@ -112,8 +112,9 @@ than a runtime validator: - **`fixtures`** — the type's plaintext fixture list (see below). **Terms** are fixed by the `Term` enum (`crates/eql-scalars/src/lib.rs`). The -`json_key` / `extractor` / `returns` / `ctor` values are the cross-schema SQL -contract — changing one is a generated-SQL behaviour change, not a refactor: +`json_key` / `extractor` / `ctor` values are the cross-schema SQL contract (the +Returns column below is `eql_v3.` + `ctor`) — changing one is a generated-SQL +behaviour change, not a refactor: | Term | JSON key | Extractor | Returns | Operators | | ----- | -------- | ----------- | -------------------------------- | -------------------------- | @@ -122,7 +123,7 @@ contract — changing one is a generated-SQL behaviour change, not a refactor: A type that needs a non-ORE equality term on an ordered domain needs a **new `Term`**, not a catalog flag. Adding a term is a code change to the `Term` -enum's `impl` methods (`json_key`, `extractor`, `returns`, `ctor`, `role`, +enum's `impl` methods (`json_key`, `extractor`, `ctor`, `role`, `operators`, `requires`) with matching `#[test]`s (`term_tests` / `term_helper_tests`) — never a free-form catalog field. @@ -166,7 +167,7 @@ matrix-pivot requirement: - `every_fixture_value_is_within_kind_bounds` keeps every resolved value in range. -These are the compile/test-time analogue of the old `load_spec` validation. +These run at compile/test time rather than at generation time. Beyond the pivots, choose values so range operators produce distinguishable result counts, include useful boundaries, and cover omitted-term negative cases. @@ -204,8 +205,9 @@ jsonb-backed and token-driven): method (not a `const`), and the comparison pivots come from `ScalarType::min_pivot()` / `max_pivot()` (zero stays `Default::default()`). Integer impls return `Self::MIN`/`Self::MAX` (emitted by the proc-macro); - temporal impls return explicit sentinel dates and are **hand-written** in - `scalar_domains.rs` (the macro emits only integer impls). `to_sql_literal` is + temporal impls return explicit sentinel dates and are emitted by the + `temporal_values!` declarative macro in `scalar_domains.rs` (the proc-macro + emits only integer impls). `to_sql_literal` is overridden to single-quote the value (`'1970-01-01'`), since a bare `Display` date is not a valid SQL literal. - **The sqlx `chrono` feature.** The test crate enables sqlx's `chrono` feature @@ -219,13 +221,13 @@ jsonb-backed and token-driven): ## 3. Wire the SQLx matrix oracle The generated SQL is enough to *install* the domains, but the -`ordered_numeric_matrix!` suite only runs once the Rust harness knows about the +`scalar_matrix!` suite only runs once the Rust harness knows about the scalar. `` is the scalar's Rust type (`i32` for `int4`, `i16` for `int2`). There are now **two** registrations: | File | Add | |------|-----| -| `tests/sqlx/src/scalar_types.rs` | One ` => ` line in the `scalar_types!` list (e.g. `int8 => i64,`). This single line drives the `impl ScalarType`, the `eql_v2_` fixture module, the `ordered_numeric_matrix!` suite, and the `generate_for_token` arm — all generated by the `eql-tests-macros` proc-macros. | +| `tests/sqlx/src/scalar_types.rs` | One ` => ` line in the `scalar_types!` list (e.g. `int8 => i64,`). This single line drives the `impl ScalarType`, the `eql_v2_` fixture module, the `scalar_matrix!` suite, and the `generate_for_token` arm — all generated by the `eql-tests-macros` proc-macros. | | `tests/sqlx/src/fixtures/eql_plaintext.rs` | A sealed `EqlPlaintext` impl for ``: `impl Sealed for {}` and `impl EqlPlaintext for ` carrying just `const KIND: ScalarKind` plus the value-typed `to_plaintext` → the right `Plaintext` variant. `CAST` and `PLAINTEXT_SQL_TYPE` are **derived** from `KIND` via the `cast_for_kind` / `plaintext_sql_type_for_kind` `const fn` defaults, so a brand-new integer kind needs an arm in those two helpers — not a per-type const. Keep the three `#[test]`s (cast / sql-type / to_plaintext) mirroring the existing ones. | The single ` => ` line in `scalar_types.rs` is the harness source of @@ -243,13 +245,17 @@ type but the binary has no `scalars::::` tests. A catalog token absent from the `scalar_types!` list also fails the `generate_for_token` catch-all loudly at fixture-generation time. -The coverage these registrations unlock comes from the `ordered_numeric_matrix!` +The coverage these registrations unlock comes from the `scalar_matrix!` convention wrapper in `tests/sqlx/src/matrix.rs`: one `impl ScalarType` plus a -single invocation taking `suite`, `scalar`, and `eql_type`. The matrix derives -its comparison pivots — the scalar's `MIN`, `MAX`, and zero -(`Default::default()`) — from the type rather than a hand-written list, so the -invocation carries no pivot argument. Equality-only scalars use the sibling -`eq_only_scalar_matrix!`. The `matrix.rs` module header is the canonical, +single invocation taking `suite`, `scalar`, `eql_type`, and a `caps` capability +marker. The matrix derives its comparison pivots — the scalar's `MIN`, `MAX`, +and zero (`Default::default()`) — from the type rather than a hand-written list, +so the invocation carries no pivot argument. `caps = [eq, ord]` selects the +ordered-numeric shape (all four variants; `=`/`<>`/`<`/`<=`/`>`/`>=`; ORDER BY / +ORDER BY USING; ORE injectivity); `caps = [eq]` selects the equality-only shape +(storage + `_eq` only; the four ord operators are deliberate blockers). Both +expand to the lower-level `scalar_domain_matrix!`. The `matrix.rs` module header +is the canonical, current list of the categories the matrix emits (sanity, correctness, cross-shape, supported-NULL, blocker raises, index engagement, ORDER BY, ORDER BY USING) — read it rather than duplicating a count here. For ordered `int4`, @@ -314,7 +320,7 @@ the type-generic generator: the templates are pure token substitution, so a per-type baseline can only fail where `int4`'s already would. Drift protection for a new type comes from the `int4` reference (shared templates + `Term` enum), the catalog `values_tests` pinning the materialised `_VALUES`, the -catalog/generator `#[test]`s, and the `ordered_numeric_matrix!` SQLx suite +catalog/generator `#[test]`s, and the `scalar_matrix!` SQLx suite (behaviour, not bytes). --- @@ -544,7 +550,7 @@ runs as `cargo run -p eql-codegen` (no subcommand), which calls and matrix-inventory enumeration). `main` (`crates/eql-codegen/src/main.rs`) recognises exactly these two forms; any other argument is a usage error. -The generator targets the `eql_v3` schema throughout: `CORE_SCHEMA = "eql_v3"` +The generator targets the `eql_v3` schema throughout: `SCHEMA = "eql_v3"` (`crates/eql-codegen/src/consts.rs`) qualifies both the domain families and the SEM index-term types the extractors return (`eql_v3.hmac_256`, `eql_v3.ore_block_u64_8_256`), so no generated SQL references `eql_v2`. diff --git a/tests/sqlx/tests/encrypted_domain/family/jsonb_operator_surface.rs b/tests/sqlx/tests/encrypted_domain/family/jsonb_operator_surface.rs index b10a848c..70abb64d 100644 --- a/tests/sqlx/tests/encrypted_domain/family/jsonb_operator_surface.rs +++ b/tests/sqlx/tests/encrypted_domain/family/jsonb_operator_surface.rs @@ -2,10 +2,9 @@ //! //! The storage-only domains (`eql_v3.int4`, future scalars) promise that //! *every* native jsonb operator is blocked, so an encrypted column can never -//! fall through to plaintext-jsonb semantics. That promise rests on three -//! hand-maintained lists in `tasks/codegen/operator_surface.py` -//! (`SYMMETRIC_OPERATORS`, `PATH_OPERATORS`, `BLOCKER_ONLY_OPERATORS`), whose -//! union is `KNOWN_JSONB_OPERATORS`. +//! fall through to plaintext-jsonb semantics. That promise rests on the +//! enumerated operator surface in `crates/eql-codegen/src/operator_surface.rs` +//! (the `OPERATORS` const), mirrored locally below as `KNOWN_JSONB_OPERATORS`. //! //! Those lists are an *enumeration*, not a structural guarantee: a future PG //! version could add a jsonb operator that nobody adds here, and it would @@ -16,18 +15,17 @@ //! e.g. `~~` / `~~*`) are excluded — they are not native and are unreachable //! from a storage scalar domain. //! -//! Source of truth: `tasks/codegen/operator_surface.py::KNOWN_JSONB_OPERATORS` -//! (asserted complete by `tasks/codegen/test_operator_surface.py`). The set -//! below is hardcoded — the lowest-friction bridge from a Python constant to a -//! Rust test — and must be kept in sync with that module. If you add an -//! operator there, add it here; the Python test pins the union so the two can -//! only drift in this file. +//! Source of truth: `crates/eql-codegen/src/operator_surface.rs` (the +//! `OPERATORS` const, pinned at 20 entries by its own unit tests). The set +//! below is a hardcoded mirror and must be kept in sync with that module. If +//! you add an operator there, add it here. use anyhow::Result; use sqlx::PgPool; -/// Mirror of `KNOWN_JSONB_OPERATORS` in -/// `tasks/codegen/operator_surface.py`. Keep in sync with that module. +/// Mirror of the enumerated operator surface in +/// `crates/eql-codegen/src/operator_surface.rs` (`OPERATORS`). Keep in sync +/// with that module. const KNOWN_JSONB_OPERATORS: &[&str] = &[ // symmetric (supported wrappers) "=", "<>", "<", "<=", ">", ">=", "@>", "<@", // @@ -77,11 +75,11 @@ async fn every_native_jsonb_operator_is_known_to_the_generator(pool: PgPool) -> assert!( missing.is_empty(), "PostgreSQL exposes jsonb operator(s) not enumerated in \ - tasks/codegen/operator_surface.py (KNOWN_JSONB_OPERATORS): {missing:#?}. \ + crates/eql-codegen/src/operator_surface.rs (OPERATORS): {missing:#?}. \ A storage-only encrypted domain would route these to native \ plaintext-jsonb semantics instead of an EQL blocker. Add each symbol \ - to the appropriate list in operator_surface.py (and to the mirror in \ - this test) and regenerate the SQL surface." + to OPERATORS in operator_surface.rs (and to the mirror in this test) \ + and regenerate the SQL surface." ); Ok(())