diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9575fd16b4..f38df090d8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -768,6 +768,8 @@ jobs: graphistry/tests/compute/gfql/test_row_pipeline_ops.py \ graphistry/tests/compute/gfql/test_schema_changers.py \ graphistry/tests/compute/gfql/test_let_schema_changers.py \ + graphistry/tests/compute/gfql/test_rollout.py \ + graphistry/tests/compute/gfql/test_rollout_binder_integration.py \ graphistry/tests/compute/test_hop.py \ graphistry/tests/compute/test_chain.py \ graphistry/tests/compute/test_chain_let.py \ @@ -775,6 +777,19 @@ jobs: graphistry/tests/compute/test_dataframe_primitives.py \ tests/gfql/ref/ + - name: T5 rollout gate — execution-path canary (env=true) + # Default-loose path is exercised by the env-unset run above (T5 #1311). + # This step asserts the env-on canary path: the same rollout test slice + # run with GRAPHISTRY_GFQL_STRICT_SCHEMA=true to confirm strict-mode + # default behavior on the binder execution compile path. + env: + GRAPHISTRY_GFQL_STRICT_SCHEMA: "true" + run: | + source pygraphistry/bin/activate + python -B -m pytest -vv \ + graphistry/tests/compute/gfql/test_rollout.py \ + graphistry/tests/compute/gfql/test_rollout_binder_integration.py + test-pandas-compat: name: test-pandas-compat (${{ matrix.label }}, py${{ matrix.python-version }}) @@ -889,6 +904,8 @@ jobs: graphistry/tests/compute/gfql/test_row_pipeline_ops.py \ graphistry/tests/compute/gfql/test_schema_changers.py \ graphistry/tests/compute/gfql/test_let_schema_changers.py \ + graphistry/tests/compute/gfql/test_rollout.py \ + graphistry/tests/compute/gfql/test_rollout_binder_integration.py \ graphistry/tests/compute/test_hop.py \ graphistry/tests/compute/test_chain.py \ graphistry/tests/compute/test_chain_let.py \ @@ -896,6 +913,17 @@ jobs: graphistry/tests/compute/test_dataframe_primitives.py \ tests/gfql/ref/ + - name: T5 rollout gate — execution-path canary (env=true) + # Default-loose path is exercised by the env-unset run above (T5 #1311). + # This step asserts the env-on canary path on pandas-compat matrix. + env: + GRAPHISTRY_GFQL_STRICT_SCHEMA: "true" + run: | + source pygraphistry/bin/activate + python -B -m pytest -vv \ + graphistry/tests/compute/gfql/test_rollout.py \ + graphistry/tests/compute/gfql/test_rollout_binder_integration.py + test-core-python: needs: [ test-minimal-python, test-gfql-core, generate-lockfiles ] diff --git a/CHANGELOG.md b/CHANGELOG.md index 508eeac5bc..0dd67fd512 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - **GFQL / Cypher reentry follow-through cleanup (#989, post-#1260 extraction)**: In `graphistry/compute/gfql/cypher/reentry/runtime.py`, free-form intermediate MATCH plan construction now routes through the whole-row/free-form `ReentryPlan` contract instead of scalar-only fallback tagging. This makes the dedicated runtime `plan.free_form` lane reachable again and removes incidental scalar-only-path dependence for free-form reentry dispatch. - **GFQL native types T4 — Arrow/type bridge contracts and coercion semantics (#1312, #1262, #1046)**: Added `graphistry/compute/gfql/ir/arrow_bridge.py` with stable schema-level interchange helpers `to_arrow()` and `from_arrow()` for `RowSchema` + schema-confidence metadata. The bridge records per-field logical-type metadata (`gfql.logical_type`) and confidence (`gfql.schema_confidence`) for deterministic round-trips, supports strict vs widening coercion (`coercion='strict'|'widen'`) at export/import boundaries, preserves scalar nullability exactly, and defines structural-type fallback behavior (`NodeRef`/`EdgeRef`/`PathType` as widened string bridge fields in widen mode). Added focused regression coverage in `graphistry/tests/compute/gfql/test_ir_arrow_bridge.py` for round-trip fidelity, nullability behavior, confidence handling, and strict/widen coercion boundaries. - **GFQL type-system T3.b — nullable-helper consolidation follow-through (#1309, #1304 audit continuation under #1262/#1046)**: Migrated the two deferred nullable call sites onto `ir.metadata` helpers: binder UNION branch merge now derives `nullable` via `bound_variable_is_nullable(existing|branch_var)` instead of direct field OR, and lowering bound-nullable alias derivation now uses `bound_variable_is_nullable(variable)` instead of the inline `nullable or bool(null_extended_from)` flatten. Converted the IC1 differential scaffold trust-check to the same helper contract and added focused binder regression coverage (`RETURN null AS x UNION RETURN 2 AS x`) locking nullable-bit preservation across UNION branch merge. +- **GFQL type-system T5 — rollout-gate canary for the binder execution path (#1311, #1262 T5)**: Added `graphistry/compute/gfql/rollout.py` with a stable env-driven canary contract (`STRICT_SCHEMA_ENV = "GRAPHISTRY_GFQL_STRICT_SCHEMA"`, `env_bool`, `strict_schema_env_default`, `resolve_strict_schema`) and re-exports from `graphistry.compute.gfql`. Wired the resolver into `binder._strict_schema_mode` so the strict/permissive precedence on the *execution* compile path is now: explicit caller param > catalog `metadata["strict"]` > env-default > loose. Default off — existing loose-mode callers see no behavior change. **Scope:** T5 is complementary to the explicit preflight surface added in #1320/#1321 (`g.gfql_validate(strict=True)`, `g.gfql(..., validate=True)`) which hardcode strict at preflight. T5's env-gate exclusively governs the *execution* compile path's binder default — i.e., what `g.gfql(query)` does when the caller doesn't opt into explicit preflight. This is the canary surface for org-wide rollout. Added focused regression coverage in `graphistry/tests/compute/gfql/test_rollout.py` (39 tests across env-bool truth set, env-default semantics, resolver precedence, and package re-export pin) and `test_rollout_binder_integration.py` (12 tests pinning the binder seam: env-off preserves loose behavior, env-on rejects unknown labels, explicit/catalog tiers still win, valid queries continue to pass under env=strict). Both rollout test files were folded into the existing `test-gfql-core` and `test-pandas-compat-gfql` CI lanes — default-loose path runs with the main pytest invocation, env-on path runs as a follow-on `GRAPHISTRY_GFQL_STRICT_SCHEMA=true` step in the same job (so the canary receipt rides existing matrix axes rather than a new standalone lane). Operator-guidance doc landed at `docs/source/gfql/strict_mode.rst` (added to user-guide toctree) leading with the #1320 preflight API as the explicit operator entrypoint and framing the env-gate as the execution-path canary. With T3.b (#1309), T4 (#1313), and T5 (this) all landed, meta-issue #1262 close criteria are now satisfied. ## [0.55.0 - 2026-05-05] diff --git a/docs/source/gfql/index.rst b/docs/source/gfql/index.rst index cdb4872b9e..ad0fa359af 100644 --- a/docs/source/gfql/index.rst +++ b/docs/source/gfql/index.rst @@ -62,6 +62,7 @@ See also: datetime_filtering builtin_calls policy + strict_mode wire_protocol_examples .. toctree:: diff --git a/docs/source/gfql/strict_mode.rst b/docs/source/gfql/strict_mode.rst new file mode 100644 index 0000000000..667908e101 --- /dev/null +++ b/docs/source/gfql/strict_mode.rst @@ -0,0 +1,215 @@ +Strict Schema Mode +================== + +GFQL ships with an opt-in **strict schema mode** that rejects Cypher queries +referencing labels or properties absent from the bound graph schema. The +default loose mode admits unknown names so today's exploratory and +partially-typed workflows keep working unchanged. + +GFQL exposes strict mode through two complementary surfaces: + +1. **Explicit preflight** — :py:meth:`g.gfql_validate(...) ` + and :py:meth:`g.gfql(..., validate=True) ` — + the **primary operator entrypoint** for explicit, predictable, fail-fast + schema checks. See :doc:`validation/fundamentals` and :doc:`cypher`. +2. **Execution-path rollout gate** — environment variable / catalog metadata + precedence ladder governing the default for non-validate-flagged + :py:meth:`g.gfql() ` + execution. **This is a canary surface for staged organisation-wide + adoption.** This page is its operator reference. + +What strict mode covers +----------------------- + +When enabled, the Cypher binder enforces: + +* MATCH labels exist in the catalog's node label set. +* WHERE / RETURN / UNWIND / CALL property references exist for the relevant + alias's node or edge column set. + +Strict mode is purely a binder gate — it raises ``GFQLValidationError`` (with +``ErrorCode`` in the ``E10x`` / ``E30x`` families) before any execution. There +is no runtime cost in loose mode; there is no behavior difference for valid +queries between modes. + +It does **not** cover dataframe-side per-row type checks. Arrow/type-bridge +coercion semantics are handled by ``graphistry.compute.gfql.ir.arrow_bridge`` +(landed under #1312); rollout controls for that surface are not part of this +page. + +The two surfaces in detail +-------------------------- + +Explicit preflight (the primary operator entrypoint) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For most operator workflows, prefer the explicit preflight API. It returns +structured diagnostics and never executes query operators: + +.. doc-test: skip + +.. code-block:: python + + report = g.gfql_validate( + "MATCH (p:Person) RETURN p.name AS name", + strict=True, + ) + if not report["ok"]: + for diag in report["diagnostics"]: + print(diag["code"], diag["message"]) + +For execution guarded by a preflight check, use the ``validate=True`` flag +on ``g.gfql(...)`` (which runs the same preflight in strict mode before +executing): + +.. doc-test: skip + +.. code-block:: python + + result = g.gfql( + "MATCH (p:Person) RETURN p.name AS name", + validate=True, + ) + +These surfaces are predictable and not influenced by environment variables — +they always run strict checks when invoked, and they are the right tool for +explicit per-call enforcement (request handlers, notebooks, CI gates). + +Execution-path rollout gate (this page's primary topic) +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +For rollout scenarios where you want to flip strict-mode default behavior +across an environment **without modifying every call site**, GFQL exposes a +three-tier precedence ladder for the binder default used by +``g.gfql(query)`` (i.e., the path with ``validate=False``, the default): + +1. **Explicit binder parameter** — the strongest signal. + +.. doc-test: skip + +.. code-block:: python + + from graphistry.compute.gfql.frontends.cypher.binder import FrontendBinder + FrontendBinder().bind(ast, ctx, strict_name_resolution=True) + +This is rarely useful directly — most callers reach the binder via +``g.gfql(query)`` rather than constructing it themselves. + +2. **Catalog metadata flag** — pinned per dataset. + +.. doc-test: skip + +.. code-block:: python + + from graphistry.compute.gfql.ir.compilation import GraphSchemaCatalog + catalog = GraphSchemaCatalog.from_schema_parts( + node_columns={"id", "label__Person"}, + edge_columns={"src", "dst", "label__KNOWS"}, + metadata={"strict": True}, + ) + +3. **Process-wide environment variable** — the canary toggle. + +.. code-block:: bash + + export GRAPHISTRY_GFQL_STRICT_SCHEMA=true + +Truthy values: ``1``, ``true``, ``yes``, ``on`` (case-insensitive). +Falsy / unset: anything else (default ``false``). + +When more than one tier opts in, strict applies. Monotonic widening: + +* Explicit param ``True`` → strict. +* Catalog ``metadata["strict"]=True`` → strict. +* Env ``GRAPHISTRY_GFQL_STRICT_SCHEMA=true`` → strict. +* Otherwise → loose. + +An explicit ``False`` is treated as *no preference* — it does not force loose +mode when the catalog or env elects strict. To force loose, do not set any of +the three opt-ins. + +**Important scoping note:** this precedence ladder governs the binder default +on the *execution* compile path. The explicit preflight API +(``g.gfql_validate(strict=True)``, ``g.gfql(validate=True)``) is unaffected +by these tiers — it always runs strict checks when invoked. The two surfaces +are independent on purpose: explicit preflight for predictable per-call +diagnostics, environment ladder for organization-wide canary rollout. + +Diagnostic shape +---------------- + +Strict-mode rejections raise ``GFQLValidationError`` with deterministic +messages and sorted availability hints: + +.. code-block:: text + + Cypher label is missing from strict binder schema catalog. + Use labels that exist in the node schema or disable strict mode. + available labels: [Comment, Person, Post] + +Use the message text to identify the gap, then either fix the query, extend +the catalog, or temporarily disable strict mode while iterating. + +When to enable +-------------- + +Recommended: + +* Production query gates where unknown identifiers should fail closed. +* CI / pre-merge quality bars over a curated catalog. +* Multi-team environments where catalog ownership is centralized. + +Keep loose: + +* Exploratory / notebook usage where the schema is being discovered. +* Pipelines where catalogs may be partial by design (for example, post-ingest + before label propagation finalises). + +Recommended rollout sequence +---------------------------- + +Stage adoption from the least invasive control to the most specific: + +1. **Canary via environment variable** — set + ``GRAPHISTRY_GFQL_STRICT_SCHEMA=true`` in a non-production shadow + environment. Watch validation diagnostics from any ``g.gfql(query)`` calls + that previously ran loose. Catch and triage queries that newly reject. +2. **Per-dataset opt-in via catalog metadata** — once the canary surface is + clean, enable ``metadata={"strict": True}`` on the catalogs that should + fail closed in production. This pins behavior independently of the env. +3. **Per-call enforcement via explicit preflight** — for the tightest path + (for example a request handler that should never accept unknown + identifiers), prefer the explicit preflight surface: + +.. doc-test: skip + +.. code-block:: python + + result = g.gfql(query, validate=True) + +This is more readable than the binder param and runs structured diagnostics. +Use it for code that wants strict regardless of catalog or env. + +Rolling back the rollout gate is always safe: clear the env var or remove the +catalog flag; loose mode returns immediately on the next bind. The explicit +preflight surface is unaffected by either. + +Related lanes +------------- + +* T1 (#1296) — schema catalog contract. +* T2 (#1302) — added the binder-time strict checks themselves. +* T3 (#1300) — type/nullability metadata propagation contract. +* T3.b (#1309) — nullable-helper consolidation follow-through. +* T4 (#1313) — Arrow/type-bridge contract surface. +* T5 (#1311) — this page; rollout / docs / CI receipts for staged adoption. +* #1320 / #1321 — explicit preflight API (``g.gfql_validate``, + ``g.gfql(validate=True)``) — the primary operator entrypoint for explicit + strict-mode invocation. + +See also +-------- + +* :doc:`validation/fundamentals` — preflight + execution-time validation + primitives, including ``g.gfql_validate(...)``. +* :doc:`cypher` — Cypher syntax reference and preflight examples. diff --git a/graphistry/compute/gfql/__init__.py b/graphistry/compute/gfql/__init__.py index 1df331d90f..072ee2185e 100644 --- a/graphistry/compute/gfql/__init__.py +++ b/graphistry/compute/gfql/__init__.py @@ -21,25 +21,38 @@ GFQLColumnNotFoundError ) +from graphistry.compute.gfql.rollout import ( + STRICT_SCHEMA_ENV, + env_bool, + resolve_strict_schema, + strict_schema_env_default, +) + __all__ = [ # Validation classes 'ValidationIssue', 'Schema', - + # Validation functions 'validate_syntax', - 'validate_schema', + 'validate_schema', 'validate_query', 'extract_schema', 'extract_schema_from_dataframes', 'format_validation_errors', 'suggest_fixes', - + # Exceptions 'GFQLException', 'GFQLValidationError', 'GFQLSyntaxError', 'GFQLSchemaError', 'GFQLTypeError', - 'GFQLColumnNotFoundError' + 'GFQLColumnNotFoundError', + + # Rollout gates (T5 #1311) + 'STRICT_SCHEMA_ENV', + 'env_bool', + 'resolve_strict_schema', + 'strict_schema_env_default', ] diff --git a/graphistry/compute/gfql/frontends/cypher/binder.py b/graphistry/compute/gfql/frontends/cypher/binder.py index 6487031f69..2880f2e268 100644 --- a/graphistry/compute/gfql/frontends/cypher/binder.py +++ b/graphistry/compute/gfql/frontends/cypher/binder.py @@ -39,6 +39,7 @@ from graphistry.compute.gfql.ir.metadata import bound_variable_is_nullable from graphistry.compute.gfql.ir.logical_plan import RowSchema from graphistry.compute.gfql.ir.types import BoundPredicate, EdgeRef, ListType, LogicalType, NodeRef, PathType, ScalarType +from graphistry.compute.gfql.rollout import resolve_strict_schema CypherAST = Union[CypherQuery, CypherUnionQuery, CypherGraphQuery] SchemaConfidence = Literal["declared", "propagated", "inferred"] @@ -1411,10 +1412,13 @@ def _looks_like_list_literal(text: str) -> bool: def _strict_schema_mode(state: _BindState) -> bool: - if state.strict_name_resolution: - return True - strict_flag = state.catalog.metadata.get("strict") - return bool(strict_flag) + catalog_strict = state.catalog.metadata.get("strict") + return bool( + resolve_strict_schema( + explicit=state.strict_name_resolution, + catalog_strict=bool(catalog_strict) if catalog_strict is not None else None, + ) + ) def _catalog_node_labels(catalog: GraphSchemaCatalog) -> Tuple[str, ...]: diff --git a/graphistry/compute/gfql/rollout.py b/graphistry/compute/gfql/rollout.py new file mode 100644 index 0000000000..e51a302c81 --- /dev/null +++ b/graphistry/compute/gfql/rollout.py @@ -0,0 +1,74 @@ +"""GFQL rollout gates: env-driven canary toggles for staged adoption. + +Stable contract surface for T5 (#1311) under #1262 / #1046. + +Today this module gates one knob — strict schema validation in the Cypher +binder (T2 #1302). Helpers and the resolver are written so additional +rollout gates can be added without re-shaping callers. + +Precedence (most specific wins): + 1. Explicit caller parameter (e.g. ``FrontendBinder.bind(strict_name_resolution=True)``) + 2. Catalog-level metadata flag (e.g. ``GraphSchemaCatalog.metadata['strict']``) + 3. Process-wide env default (e.g. ``GRAPHISTRY_GFQL_STRICT_SCHEMA``) + 4. Loose default + +The env tier is for canary / gradual rollout; default-off so existing +loose-mode callers see no behavior change. + +Production callers: + ``graphistry/compute/gfql/frontends/cypher/binder.py:_strict_schema_mode`` +""" + +from __future__ import annotations + +import os +from typing import Optional + +__all__ = [ + "STRICT_SCHEMA_ENV", + "ENV_TRUTHY", + "ENV_FALSY", + "env_bool", + "strict_schema_env_default", + "resolve_strict_schema", +] + +STRICT_SCHEMA_ENV: str = "GRAPHISTRY_GFQL_STRICT_SCHEMA" + +ENV_TRUTHY: "frozenset[str]" = frozenset({"1", "true", "yes", "on"}) +ENV_FALSY: "frozenset[str]" = frozenset({"0", "false", "no", "off"}) + + +def env_bool(name: str, default: bool = False) -> bool: + """Read a boolean env var. Unset / empty / unrecognized -> ``default``.""" + raw = os.environ.get(name, "").strip().lower() + if not raw: + return default + if raw in ENV_TRUTHY: + return True + if raw in ENV_FALSY: + return False + return default + + +def strict_schema_env_default() -> bool: + """Return the env-default for strict schema mode (default off).""" + return env_bool(STRICT_SCHEMA_ENV, default=False) + + +def resolve_strict_schema( + *, + explicit: bool, + catalog_strict: Optional[bool], +) -> bool: + """Apply strict-schema precedence: explicit > catalog > env > loose. + + A monotonic widening: any tier that asks for strict wins. Explicit + ``False`` does not force loose mode (callers passing the default cannot + override a catalog/env opt-in). + """ + if explicit: + return True + if catalog_strict: + return True + return strict_schema_env_default() diff --git a/graphistry/tests/compute/gfql/test_rollout.py b/graphistry/tests/compute/gfql/test_rollout.py new file mode 100644 index 0000000000..374fd7e238 --- /dev/null +++ b/graphistry/tests/compute/gfql/test_rollout.py @@ -0,0 +1,146 @@ +"""Tests for GFQL rollout gates (T5 #1311).""" + +from __future__ import annotations + +import pytest + +from graphistry.compute.gfql.rollout import ( + ENV_FALSY, + ENV_TRUTHY, + STRICT_SCHEMA_ENV, + env_bool, + resolve_strict_schema, + strict_schema_env_default, +) + + +class TestEnvBool: + def test_unset_returns_default_false(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("GRAPHISTRY_TEST_FLAG", raising=False) + assert env_bool("GRAPHISTRY_TEST_FLAG") is False + + def test_unset_returns_default_true(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("GRAPHISTRY_TEST_FLAG", raising=False) + assert env_bool("GRAPHISTRY_TEST_FLAG", default=True) is True + + @pytest.mark.parametrize("value", sorted(ENV_TRUTHY)) + def test_truthy_values(self, monkeypatch: pytest.MonkeyPatch, value: str) -> None: + monkeypatch.setenv("GRAPHISTRY_TEST_FLAG", value) + assert env_bool("GRAPHISTRY_TEST_FLAG") is True + + @pytest.mark.parametrize("value", sorted(ENV_FALSY)) + def test_falsy_values(self, monkeypatch: pytest.MonkeyPatch, value: str) -> None: + monkeypatch.setenv("GRAPHISTRY_TEST_FLAG", value) + assert env_bool("GRAPHISTRY_TEST_FLAG", default=True) is False + + def test_case_insensitive(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GRAPHISTRY_TEST_FLAG", "TRUE") + assert env_bool("GRAPHISTRY_TEST_FLAG") is True + monkeypatch.setenv("GRAPHISTRY_TEST_FLAG", "Yes") + assert env_bool("GRAPHISTRY_TEST_FLAG") is True + + def test_whitespace_stripped(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GRAPHISTRY_TEST_FLAG", " true ") + assert env_bool("GRAPHISTRY_TEST_FLAG") is True + + def test_empty_returns_default(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GRAPHISTRY_TEST_FLAG", "") + assert env_bool("GRAPHISTRY_TEST_FLAG", default=True) is True + assert env_bool("GRAPHISTRY_TEST_FLAG", default=False) is False + + def test_unrecognized_returns_default(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("GRAPHISTRY_TEST_FLAG", "maybe") + assert env_bool("GRAPHISTRY_TEST_FLAG", default=False) is False + assert env_bool("GRAPHISTRY_TEST_FLAG", default=True) is True + + +class TestStrictSchemaEnvDefault: + def test_unset_is_loose(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv(STRICT_SCHEMA_ENV, raising=False) + assert strict_schema_env_default() is False + + def test_explicit_false_is_loose(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, "false") + assert strict_schema_env_default() is False + + def test_explicit_true_is_strict(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, "true") + assert strict_schema_env_default() is True + + def test_env_name_constant(self) -> None: + # Pin the public env-var name; renaming would break operators. + assert STRICT_SCHEMA_ENV == "GRAPHISTRY_GFQL_STRICT_SCHEMA" + + +class TestResolveStrictSchema: + def test_default_loose(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv(STRICT_SCHEMA_ENV, raising=False) + assert resolve_strict_schema(explicit=False, catalog_strict=None) is False + + def test_explicit_true_wins_over_unset_env(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv(STRICT_SCHEMA_ENV, raising=False) + assert resolve_strict_schema(explicit=True, catalog_strict=None) is True + + def test_explicit_true_wins_over_env_false(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, "false") + assert resolve_strict_schema(explicit=True, catalog_strict=False) is True + + def test_catalog_true_wins_over_env_false(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, "false") + assert resolve_strict_schema(explicit=False, catalog_strict=True) is True + + def test_env_true_with_no_explicit_or_catalog(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, "true") + assert resolve_strict_schema(explicit=False, catalog_strict=None) is True + + def test_env_true_with_explicit_false_still_strict( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + # Explicit False does NOT force loose — monotonic widening. + # Caller passing False is "no preference", not "force loose". + monkeypatch.setenv(STRICT_SCHEMA_ENV, "true") + assert resolve_strict_schema(explicit=False, catalog_strict=None) is True + + def test_env_true_with_catalog_false_still_strict( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + # Catalog False is also "no opinion" today (consistent with current + # binder behavior where catalog.metadata.get('strict') returning a + # falsy value falls through). Monotonic. + monkeypatch.setenv(STRICT_SCHEMA_ENV, "true") + assert resolve_strict_schema(explicit=False, catalog_strict=False) is True + + def test_all_loose(self, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, "false") + assert resolve_strict_schema(explicit=False, catalog_strict=False) is False + assert resolve_strict_schema(explicit=False, catalog_strict=None) is False + + @pytest.mark.parametrize("env_value", ["1", "true", "yes", "on", "TRUE", "Yes"]) + def test_truthy_env_values_strict( + self, monkeypatch: pytest.MonkeyPatch, env_value: str + ) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, env_value) + assert resolve_strict_schema(explicit=False, catalog_strict=None) is True + + @pytest.mark.parametrize("env_value", ["0", "false", "no", "off", "FALSE", ""]) + def test_falsy_env_values_loose( + self, monkeypatch: pytest.MonkeyPatch, env_value: str + ) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, env_value) + assert resolve_strict_schema(explicit=False, catalog_strict=None) is False + + +class TestPackageSurface: + """Pin re-exports so consumers wiring rollout knobs have a stable surface.""" + + def test_reexports_from_compute_gfql(self) -> None: + from graphistry.compute.gfql import ( + STRICT_SCHEMA_ENV as PKG_ENV, + env_bool as pkg_env_bool, + resolve_strict_schema as pkg_resolve, + strict_schema_env_default as pkg_default, + ) + assert PKG_ENV == STRICT_SCHEMA_ENV + assert pkg_env_bool is env_bool + assert pkg_resolve is resolve_strict_schema + assert pkg_default is strict_schema_env_default diff --git a/graphistry/tests/compute/gfql/test_rollout_binder_integration.py b/graphistry/tests/compute/gfql/test_rollout_binder_integration.py new file mode 100644 index 0000000000..72f0bb2daf --- /dev/null +++ b/graphistry/tests/compute/gfql/test_rollout_binder_integration.py @@ -0,0 +1,131 @@ +"""Binder-integration tests for T5 #1311 strict-schema env gate. + +Pins the wiring between ``graphistry.compute.gfql.rollout`` and +``binder._strict_schema_mode`` so renames or rewrites at either end +break loudly. +""" + +from __future__ import annotations + +import pytest + +from graphistry.compute.gfql.cypher.parser import parse_cypher +from graphistry.compute.gfql.frontends.cypher.binder import FrontendBinder +from graphistry.compute.gfql.ir.compilation import GraphSchemaCatalog, PlanContext +from graphistry.compute.gfql.rollout import STRICT_SCHEMA_ENV +from graphistry.compute.exceptions import ErrorCode, GFQLValidationError + + +def _ctx_with_catalog(*, strict_metadata: bool = False) -> PlanContext: + catalog = GraphSchemaCatalog.from_schema_parts( + node_columns=frozenset({"id", "label__Person"}), + edge_columns=frozenset({"src", "dst", "label__KNOWS"}), + node_id_column="id", + edge_source_column="src", + edge_destination_column="dst", + metadata={"strict": True} if strict_metadata else {}, + ) + return PlanContext(catalog=catalog) + + +def _query_with_unknown_label() -> str: + return "MATCH (n:UnknownLabel) RETURN n" + + +class TestBinderEnvGateOff: + """Default behavior: env unset / false → loose mode (no behavior change).""" + + def test_env_unset_loose_accepts_unknown_label( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.delenv(STRICT_SCHEMA_ENV, raising=False) + ctx = _ctx_with_catalog() + ast = parse_cypher(_query_with_unknown_label()) + FrontendBinder().bind(ast, ctx) # no exception in loose mode + + def test_env_false_loose_accepts_unknown_label( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, "false") + ctx = _ctx_with_catalog() + ast = parse_cypher(_query_with_unknown_label()) + FrontendBinder().bind(ast, ctx) + + +class TestBinderEnvGateOn: + """Env=true elevates default to strict when caller did not opt in explicitly.""" + + def test_env_true_rejects_unknown_label( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, "true") + ctx = _ctx_with_catalog() + ast = parse_cypher(_query_with_unknown_label()) + with pytest.raises(GFQLValidationError) as excinfo: + FrontendBinder().bind(ast, ctx) + # Check this is a strict-mode diagnostic (label-missing class). + assert excinfo.value.code in { + ErrorCode.E301, + ErrorCode.E302, + ErrorCode.E103, + } + + @pytest.mark.parametrize("env_value", ["1", "yes", "on", "TRUE"]) + def test_env_truthy_variants_rejects( + self, monkeypatch: pytest.MonkeyPatch, env_value: str + ) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, env_value) + ctx = _ctx_with_catalog() + ast = parse_cypher(_query_with_unknown_label()) + with pytest.raises(GFQLValidationError): + FrontendBinder().bind(ast, ctx) + + +class TestBinderPrecedence: + """Explicit param + catalog flag still win regardless of env state.""" + + def test_explicit_true_with_env_unset_strict( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.delenv(STRICT_SCHEMA_ENV, raising=False) + ctx = _ctx_with_catalog() + ast = parse_cypher(_query_with_unknown_label()) + with pytest.raises(GFQLValidationError): + FrontendBinder().bind(ast, ctx, strict_name_resolution=True) + + def test_explicit_true_with_env_false_strict( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, "false") + ctx = _ctx_with_catalog() + ast = parse_cypher(_query_with_unknown_label()) + with pytest.raises(GFQLValidationError): + FrontendBinder().bind(ast, ctx, strict_name_resolution=True) + + def test_catalog_strict_with_env_unset_strict( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.delenv(STRICT_SCHEMA_ENV, raising=False) + ctx = _ctx_with_catalog(strict_metadata=True) + ast = parse_cypher(_query_with_unknown_label()) + with pytest.raises(GFQLValidationError): + FrontendBinder().bind(ast, ctx) + + def test_catalog_strict_with_env_false_strict( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + monkeypatch.setenv(STRICT_SCHEMA_ENV, "false") + ctx = _ctx_with_catalog(strict_metadata=True) + ast = parse_cypher(_query_with_unknown_label()) + with pytest.raises(GFQLValidationError): + FrontendBinder().bind(ast, ctx) + + def test_known_label_passes_under_env_strict( + self, monkeypatch: pytest.MonkeyPatch + ) -> None: + # Strict mode should not break valid queries — this is the canary + # safety net: enabling env=true must not break TCK-passing queries. + monkeypatch.setenv(STRICT_SCHEMA_ENV, "true") + ctx = _ctx_with_catalog() + ast = parse_cypher("MATCH (n:Person) RETURN n") + FrontendBinder().bind(ast, ctx)