From 1c8ca7e86d0ca150ada45684e0686e256b3e707a Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Sun, 12 Apr 2026 17:40:37 +0100 Subject: [PATCH] test: add validation that all dialect Ref/keyword references resolve Walks every grammar entry in every dialect and checks that all Ref references (including keywords, bracket pairs, terminators, excludes, and delimiters) point to names that actually exist in the dialect's library. Currently finds 263 unresolved references across 8 dialects to be fixed incrementally. Co-Authored-By: Claude Opus 4.6 (1M context) --- crates/lib-core/src/dialects.rs | 5 + crates/lib-core/src/parser/grammar.rs | 6 +- .../lib-core/src/parser/grammar/delimited.rs | 2 +- .../lib-dialects/tests/validate_references.rs | 163 ++++++++++++++++++ 4 files changed, 172 insertions(+), 4 deletions(-) create mode 100644 crates/lib-dialects/tests/validate_references.rs diff --git a/crates/lib-core/src/dialects.rs b/crates/lib-core/src/dialects.rs index 725c9888c..cbd555c28 100644 --- a/crates/lib-core/src/dialects.rs +++ b/crates/lib-core/src/dialects.rs @@ -261,6 +261,11 @@ impl Dialect { pub fn lexer(&self) -> &Lexer { self.lexer.as_ref().unwrap() } + + /// Returns an iterator over all names registered in the dialect's library. + pub fn library_names(&self) -> impl Iterator { + self.library.keys().map(|k| k.as_ref()) + } } pub type BracketPair = (&'static str, &'static str, &'static str, bool); diff --git a/crates/lib-core/src/parser/grammar.rs b/crates/lib-core/src/parser/grammar.rs index 56841011a..daa0d78e7 100644 --- a/crates/lib-core/src/parser/grammar.rs +++ b/crates/lib-core/src/parser/grammar.rs @@ -21,9 +21,9 @@ use crate::parser::segments::ErasedSegment; #[derive(Clone)] pub struct Ref { - pub(crate) reference: Cow<'static, str>, + pub reference: Cow<'static, str>, pub exclude: Option, - terminators: Vec, + pub terminators: Vec, reset_terminators: bool, pub(crate) allow_gaps: bool, pub(crate) optional: bool, @@ -176,7 +176,7 @@ impl MatchableTrait for Ref { #[derive(Clone, Debug)] pub struct Anything { cache_key: MatchableCacheKey, - terminators: Vec, + pub terminators: Vec, } impl PartialEq for Anything { diff --git a/crates/lib-core/src/parser/grammar/delimited.rs b/crates/lib-core/src/parser/grammar/delimited.rs index 3fa48b4f3..27b4bdef2 100644 --- a/crates/lib-core/src/parser/grammar/delimited.rs +++ b/crates/lib-core/src/parser/grammar/delimited.rs @@ -24,7 +24,7 @@ use crate::parser::segments::ErasedSegment; pub struct Delimited { pub base: AnyNumberOf, pub allow_trailing: bool, - pub(crate) delimiter: Matchable, + pub delimiter: Matchable, pub min_delimiters: usize, pub optional_delimiter: bool, optional: bool, diff --git a/crates/lib-dialects/tests/validate_references.rs b/crates/lib-dialects/tests/validate_references.rs new file mode 100644 index 000000000..96e1abc09 --- /dev/null +++ b/crates/lib-dialects/tests/validate_references.rs @@ -0,0 +1,163 @@ +use std::collections::BTreeSet; +use std::ops::Deref; + +use sqruff_lib_core::dialects::Dialect; +use sqruff_lib_core::dialects::init::DialectKind; +use sqruff_lib_core::parser::matchable::{Matchable, MatchableTraitImpl, MatchableTrait}; +use sqruff_lib_dialects::kind_to_dialect; +use strum::IntoEnumIterator; + +/// Recursively walk a Matchable grammar tree and collect all `Ref` reference names. +fn collect_refs(matchable: &Matchable, dialect: &Dialect, refs: &mut BTreeSet) { + match matchable.deref() { + MatchableTraitImpl::Ref(r) => { + refs.insert(r.reference.to_string()); + if let Some(exclude) = &r.exclude { + collect_refs(exclude, dialect, refs); + } + for elem in &r.terminators { + collect_refs(elem, dialect, refs); + } + } + MatchableTraitImpl::NodeMatcher(nm) => { + // Trigger lazy grammar initialization and walk it. + let grammar = nm.match_grammar(dialect); + collect_refs(&grammar, dialect, refs); + } + MatchableTraitImpl::Sequence(seq) => { + for elem in &seq.terminators { + collect_refs(elem, dialect, refs); + } + for elem in seq.elements() { + collect_refs(elem, dialect, refs); + } + } + MatchableTraitImpl::Bracketed(br) => { + for elem in &br.this.terminators { + collect_refs(elem, dialect, refs); + } + for elem in br.this.elements() { + collect_refs(elem, dialect, refs); + } + } + MatchableTraitImpl::AnyNumberOf(any) => { + if let Some(exclude) = &any.exclude { + collect_refs(exclude, dialect, refs); + } + for elem in &any.terminators { + collect_refs(elem, dialect, refs); + } + for elem in any.elements() { + collect_refs(elem, dialect, refs); + } + } + MatchableTraitImpl::Delimited(del) => { + collect_refs(&del.delimiter, dialect, refs); + if let Some(exclude) = &del.base.exclude { + collect_refs(exclude, dialect, refs); + } + for elem in &del.base.terminators { + collect_refs(elem, dialect, refs); + } + for elem in del.base.elements() { + collect_refs(elem, dialect, refs); + } + } + MatchableTraitImpl::Anything(any) => { + for elem in &any.terminators { + collect_refs(elem, dialect, refs); + } + } + MatchableTraitImpl::Conditional(_) => {} + // Leaf nodes with no sub-matchable references: + MatchableTraitImpl::StringParser(_) + | MatchableTraitImpl::TypedParser(_) + | MatchableTraitImpl::CodeParser(_) + | MatchableTraitImpl::MultiStringParser(_) + | MatchableTraitImpl::RegexParser(_) + | MatchableTraitImpl::MetaSegment(_) + | MatchableTraitImpl::NonCodeMatcher(_) + | MatchableTraitImpl::Nothing(_) + | MatchableTraitImpl::BracketedSegmentMatcher(_) + | MatchableTraitImpl::LookaheadExclude(_) => {} + } +} + +/// Collect all bracket pair segment references from a dialect. +fn collect_bracket_refs(dialect: &Dialect) -> BTreeSet { + let mut refs = BTreeSet::new(); + for set_name in ["bracket_pairs", "angle_bracket_pairs"] { + for (_bracket_type, start_ref, end_ref, _persists) in dialect.bracket_sets(set_name) { + refs.insert(start_ref.to_string()); + refs.insert(end_ref.to_string()); + } + } + refs +} + +#[test] +fn all_dialect_references_resolve() { + let mut failures = Vec::new(); + + for kind in DialectKind::iter() { + let Some(dialect) = kind_to_dialect(&kind, None) else { + continue; + }; + + let library_names: BTreeSet = dialect.library_names().map(String::from).collect(); + + // Collect all Ref references by walking every library entry. + // Use a visited set to avoid infinite recursion through NodeMatcher cycles. + let mut all_refs = BTreeSet::new(); + let mut visited = BTreeSet::new(); + + fn walk_entry( + name: &str, + dialect: &Dialect, + library_names: &BTreeSet, + all_refs: &mut BTreeSet, + visited: &mut BTreeSet, + ) { + if !visited.insert(name.to_string()) { + return; + } + if !library_names.contains(name) { + return; + } + let matchable = dialect.r#ref(name); + let mut entry_refs = BTreeSet::new(); + collect_refs(&matchable, dialect, &mut entry_refs); + for ref_name in &entry_refs { + walk_entry(ref_name, dialect, library_names, all_refs, visited); + } + all_refs.extend(entry_refs); + } + + for name in &library_names { + walk_entry(name, &dialect, &library_names, &mut all_refs, &mut visited); + } + + // Also collect bracket pair references. + let bracket_refs = collect_bracket_refs(&dialect); + all_refs.extend(bracket_refs); + + // Check every referenced name exists in the library. + for ref_name in &all_refs { + if !library_names.contains(ref_name) { + failures.push(format!( + "Dialect {}: Ref '{}' not found in library", + kind.name(), + ref_name, + )); + } + } + } + + if !failures.is_empty() { + panic!( + "Found {} unresolved references:\n{}", + failures.len(), + failures.join("\n") + ); + } +}