diff --git a/AGENTS.md b/AGENTS.md index 140b7883..29c0847a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -154,7 +154,7 @@ Tree-sitter: `((a) (b))` — Plotnik: `{(a) (b)}`. The #1 syntax error. ``` crates/ plotnik-cli/ # CLI tool - src/commands/ # Subcommands (debug, docs, exec, langs, types) + src/commands/ # Subcommands (debug, exec, langs, types) plotnik-core/ # Common code plotnik-lib/ # Plotnik as library src/ @@ -172,23 +172,44 @@ docs/ Run: `cargo run -p plotnik-cli -- ` -| Command | Purpose | -| ------- | ------------------------------- | -| `debug` | Inspect queries and source ASTs | -| `exec` | Execute query, output JSON | -| `types` | Generate TypeScript types | -| `langs` | List supported languages | +| Command | Purpose | Status | +| ------- | ------------------------------- | ------- | +| `debug` | Inspect queries and source ASTs | Working | +| `types` | Generate TypeScript types | Working | +| `langs` | List supported languages | Working | +| `exec` | Execute query, output JSON | Not yet | -Common: `-q/--query `, `--query-file `, `--source `, `-s/--source-file `, `-l/--lang ` +## debug -`debug`: `--only-symbols`, `--cst`, `--raw`, `--spans`, `--arities`, `--graph`, `--graph-raw`, `--types` -`exec`: `--pretty`, `--verbose-nodes`, `--check`, `--entry ` -`types`: `--format `, `--root-type `, `--verbose-nodes`, `--no-node-type`, `--no-export`, `-o ` +Inspect query AST/CST or parse source files with tree-sitter. ```sh -cargo run -p plotnik-cli -- debug -q '(identifier) @id' --graph -l javascript -cargo run -p plotnik-cli -- exec -q '(identifier) @id' -s app.js --pretty -cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript -o types.d.ts +cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' +cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --only-symbols +cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --types +cargo run -p plotnik-cli -- debug -s app.ts +cargo run -p plotnik-cli -- debug -s app.ts --raw +``` + +Options: `--only-symbols`, `--cst`, `--raw`, `--spans`, `--arities`, `--types` + +## types + +Generate TypeScript type definitions from a query. Requires `-l/--lang` to validate node types against grammar. + +```sh +cargo run -p plotnik-cli -- types -q 'Test = (identifier) @id' -l javascript +cargo run -p plotnik-cli -- types --query-file query.ptk -l typescript -o types.d.ts +``` + +Options: `--root-type `, `--verbose-nodes`, `--no-node-type`, `--no-export`, `-o ` + +## langs + +List supported tree-sitter languages. + +```sh +cargo run -p plotnik-cli -- langs ``` # Coding Rules diff --git a/crates/plotnik-cli/src/commands/debug/mod.rs b/crates/plotnik-cli/src/commands/debug/mod.rs index 5251720d..be2863c7 100644 --- a/crates/plotnik-cli/src/commands/debug/mod.rs +++ b/crates/plotnik-cli/src/commands/debug/mod.rs @@ -40,20 +40,13 @@ pub fn run(args: DebugArgs) { None }; - let mut query = query_source.as_ref().map(|src| { + let query = query_source.as_ref().map(|src| { Query::try_from(src.as_str()).unwrap_or_else(|e| { eprintln!("error: {}", e); std::process::exit(1); }) }); - // Auto-link when --lang is provided with a query - if args.lang.is_some() - && let Some(ref mut _q) = query - { - unimplemented!(); - } - let show_query = has_query_input && !args.symbols && !args.graph && !args.types; let show_source = has_source_input; @@ -81,11 +74,17 @@ pub fn run(args: DebugArgs) { ); } - // Build graph if needed for --graph, --graph-raw, or --types - if (args.graph || args.graph_raw || args.types) - && let Some(_) = query.take() + if args.graph || args.graph_raw { + eprintln!("error: --graph and --graph-raw are not yet implemented"); + std::process::exit(1); + } + + if args.types + && let Some(ref q) = query { - unimplemented!(); + let output = + plotnik_lib::query::type_check::emit_typescript(q.type_context(), q.interner()); + print!("{}", output); } if show_source { diff --git a/crates/plotnik-cli/src/commands/exec.rs b/crates/plotnik-cli/src/commands/exec.rs index d791e908..d99c8787 100644 --- a/crates/plotnik-cli/src/commands/exec.rs +++ b/crates/plotnik-cli/src/commands/exec.rs @@ -52,7 +52,9 @@ pub fn run(args: ExecArgs) { let _ = (args.pretty, args.verbose_nodes, args.check, args.entry); - todo!("IR emission and query execution not yet implemented") + eprintln!("error: query execution not yet implemented"); + eprintln!("hint: use `plotnik types` to generate TypeScript types from queries"); + std::process::exit(1); } fn load_query(args: &ExecArgs) -> String { diff --git a/crates/plotnik-cli/src/commands/types.rs b/crates/plotnik-cli/src/commands/types.rs index 8259d255..a913812c 100644 --- a/crates/plotnik-cli/src/commands/types.rs +++ b/crates/plotnik-cli/src/commands/types.rs @@ -1,11 +1,10 @@ -#![allow(dead_code)] - use std::fs; -use std::io::{self, Read}; +use std::io::{self, Read, Write}; use std::path::PathBuf; use plotnik_langs::Lang; use plotnik_lib::Query; +use plotnik_lib::query::type_check::{EmitConfig, emit_typescript_with_config}; pub struct TypesArgs { pub query_text: Option, @@ -32,7 +31,7 @@ pub fn run(args: TypesArgs) { } let lang = resolve_lang_required(&args.lang); - // Parse and validate query + // Parse and analyze query let query = Query::try_from(query_source.as_str()) .unwrap_or_else(|e| { eprintln!("error: {}", e); @@ -45,13 +44,25 @@ pub fn run(args: TypesArgs) { std::process::exit(1); } - // Link query against language - if !query.is_valid() { - eprint!("{}", query.diagnostics().render(query.source_map())); - std::process::exit(1); - } + // Emit TypeScript types + let config = EmitConfig { + export: args.export, + emit_node_type: !args.no_node_type, + root_type_name: args.root_type, + verbose_nodes: args.verbose_nodes, + }; - unimplemented!(); + let output = emit_typescript_with_config(query.type_context(), query.interner(), config); + + // Write output + if let Some(ref path) = args.output { + fs::write(path, &output).unwrap_or_else(|e| { + eprintln!("error: failed to write {}: {}", path.display(), e); + std::process::exit(1); + }); + } else { + io::stdout().write_all(output.as_bytes()).unwrap(); + } } fn load_query(args: &TypesArgs) -> String { @@ -66,7 +77,10 @@ fn load_query(args: &TypesArgs) -> String { .expect("failed to read stdin"); return buf; } - return fs::read_to_string(path).expect("failed to read query file"); + return fs::read_to_string(path).unwrap_or_else(|e| { + eprintln!("error: failed to read query file: {}", e); + std::process::exit(1); + }); } unreachable!("validation ensures query input exists") } diff --git a/crates/plotnik-core/src/interner.rs b/crates/plotnik-core/src/interner.rs new file mode 100644 index 00000000..36ad50cd --- /dev/null +++ b/crates/plotnik-core/src/interner.rs @@ -0,0 +1,234 @@ +//! String interning for efficient string deduplication and comparison. +//! +//! Converts heap-allocated strings into cheap integer handles (`Symbol`). +//! Comparing two symbols is O(1) integer comparison. +//! +//! The interner can be serialized to a binary blob format for the compiled query. + +use std::collections::HashMap; + +/// A lightweight handle to an interned string. +/// +/// Comparing two symbols is O(1). Symbols are ordered by insertion order, +/// not lexicographically—use `Interner::resolve` if you need string ordering. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct Symbol(u32); + +impl Symbol { + /// Raw index for serialization/debugging. + #[inline] + pub fn as_u32(self) -> u32 { + self.0 + } + + /// Create a Symbol from a raw index. Use only for deserialization. + #[inline] + pub fn from_raw(index: u32) -> Self { + Self(index) + } +} + +impl PartialOrd for Symbol { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Symbol { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.cmp(&other.0) + } +} + +/// String interner. Deduplicates strings and returns cheap Symbol handles. +#[derive(Debug, Clone, Default)] +pub struct Interner { + /// Map from string to symbol for deduplication. + map: HashMap, + /// Storage for interned strings, indexed by Symbol. + strings: Vec, +} + +impl Interner { + pub fn new() -> Self { + Self::default() + } + + /// Intern a string, returning its Symbol. + /// If the string was already interned, returns the existing Symbol. + pub fn intern(&mut self, s: &str) -> Symbol { + if let Some(&sym) = self.map.get(s) { + return sym; + } + + let sym = Symbol(self.strings.len() as u32); + self.strings.push(s.to_owned()); + self.map.insert(s.to_owned(), sym); + sym + } + + /// Intern an owned string, avoiding clone if not already present. + pub fn intern_owned(&mut self, s: String) -> Symbol { + if let Some(&sym) = self.map.get(&s) { + return sym; + } + + let sym = Symbol(self.strings.len() as u32); + self.strings.push(s.clone()); + self.map.insert(s, sym); + sym + } + + /// Resolve a Symbol back to its string. + /// + /// # Panics + /// Panics if the symbol was not created by this interner. + #[inline] + pub fn resolve(&self, sym: Symbol) -> &str { + &self.strings[sym.0 as usize] + } + + /// Try to resolve a Symbol, returning None if invalid. + #[inline] + pub fn try_resolve(&self, sym: Symbol) -> Option<&str> { + self.strings.get(sym.0 as usize).map(|s| s.as_str()) + } + + /// Number of interned strings. + #[inline] + pub fn len(&self) -> usize { + self.strings.len() + } + + /// Whether the interner is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.strings.is_empty() + } + + /// Iterate over all interned strings with their symbols. + #[inline] + pub fn iter(&self) -> impl Iterator { + self.strings + .iter() + .enumerate() + .map(|(i, s)| (Symbol(i as u32), s.as_str())) + } + + /// Emit as binary format blob and offset table. + /// + /// Returns (concatenated UTF-8 bytes, offset for each string + sentinel). + /// The offsets array has `len() + 1` entries; the last is the total blob size. + pub fn to_blob(&self) -> (Vec, Vec) { + let mut blob = Vec::new(); + let mut offsets = Vec::with_capacity(self.strings.len() + 1); + + for s in &self.strings { + offsets.push(blob.len() as u32); + blob.extend_from_slice(s.as_bytes()); + } + offsets.push(blob.len() as u32); // sentinel for length calculation + + (blob, offsets) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn intern_deduplicates() { + let mut interner = Interner::new(); + + let a = interner.intern("foo"); + let b = interner.intern("foo"); + let c = interner.intern("bar"); + + assert_eq!(a, b); + assert_ne!(a, c); + assert_eq!(interner.len(), 2); + } + + #[test] + fn resolve_roundtrip() { + let mut interner = Interner::new(); + + let sym = interner.intern("hello"); + assert_eq!(interner.resolve(sym), "hello"); + } + + #[test] + fn intern_owned_avoids_clone_on_hit() { + let mut interner = Interner::new(); + + let a = interner.intern("test"); + let b = interner.intern_owned("test".to_string()); + + assert_eq!(a, b); + assert_eq!(interner.len(), 1); + } + + #[test] + fn symbols_are_copy() { + let mut interner = Interner::new(); + let sym = interner.intern("x"); + + let copy = sym; + assert_eq!(sym, copy); + } + + #[test] + fn symbol_ordering_is_insertion_order() { + let mut interner = Interner::new(); + + let z = interner.intern("z"); + let a = interner.intern("a"); + + // z was inserted first, so z < a by insertion order + assert!(z < a); + } + + #[test] + fn to_blob_produces_correct_format() { + let mut interner = Interner::new(); + interner.intern("id"); + interner.intern("foo"); + + let (blob, offsets) = interner.to_blob(); + + assert_eq!(blob, b"idfoo"); + assert_eq!(offsets, vec![0, 2, 5]); + + // Verify we can reconstruct strings + let s0 = &blob[offsets[0] as usize..offsets[1] as usize]; + let s1 = &blob[offsets[1] as usize..offsets[2] as usize]; + assert_eq!(s0, b"id"); + assert_eq!(s1, b"foo"); + } + + #[test] + fn to_blob_empty() { + let interner = Interner::new(); + let (blob, offsets) = interner.to_blob(); + + assert!(blob.is_empty()); + assert_eq!(offsets, vec![0]); // just the sentinel + } + + #[test] + fn iter_yields_all_strings() { + let mut interner = Interner::new(); + let a = interner.intern("alpha"); + let b = interner.intern("beta"); + + let items: Vec<_> = interner.iter().collect(); + assert_eq!(items, vec![(a, "alpha"), (b, "beta")]); + } + + #[test] + fn symbol_from_raw_roundtrip() { + let sym = Symbol::from_raw(42); + assert_eq!(sym.as_u32(), 42); + } +} diff --git a/crates/plotnik-core/src/lib.rs b/crates/plotnik-core/src/lib.rs index e3ab0a97..06bd615c 100644 --- a/crates/plotnik-core/src/lib.rs +++ b/crates/plotnik-core/src/lib.rs @@ -13,8 +13,11 @@ use std::collections::HashMap; use std::num::NonZeroU16; +mod interner; mod invariants; +pub use interner::{Interner, Symbol}; + /// Raw node definition from `node-types.json`. #[derive(Debug, Clone, serde::Deserialize)] pub struct RawNode { diff --git a/crates/plotnik-lib/src/diagnostics/message.rs b/crates/plotnik-lib/src/diagnostics/message.rs index d53e34e2..8c0a2956 100644 --- a/crates/plotnik-lib/src/diagnostics/message.rs +++ b/crates/plotnik-lib/src/diagnostics/message.rs @@ -67,6 +67,10 @@ pub enum DiagnosticKind { IncompatibleTypes, MultiCaptureQuantifierNoName, UnusedBranchLabels, + StrictDimensionalityViolation, + DuplicateCaptureInScope, + IncompatibleCaptureTypes, + IncompatibleStructShapes, // Link pass - grammar validation UnknownNodeType, @@ -182,6 +186,10 @@ impl DiagnosticKind { "quantified expression with multiple captures requires `@name`" } Self::UnusedBranchLabels => "branch labels have no effect without capture", + Self::StrictDimensionalityViolation => "quantifier requires row capture", + Self::DuplicateCaptureInScope => "duplicate capture in scope", + Self::IncompatibleCaptureTypes => "incompatible capture types", + Self::IncompatibleStructShapes => "incompatible struct shapes", // Link pass - grammar validation Self::UnknownNodeType => "unknown node type", @@ -211,6 +219,18 @@ impl DiagnosticKind { Self::UndefinedReference => "`{}` is not defined".to_string(), Self::IncompatibleTypes => "incompatible types: {}".to_string(), + // Type inference errors with context + Self::StrictDimensionalityViolation => "{}".to_string(), + Self::DuplicateCaptureInScope => { + "capture `@{}` already defined in this scope".to_string() + } + Self::IncompatibleCaptureTypes => { + "capture `@{}` has incompatible types across branches".to_string() + } + Self::IncompatibleStructShapes => { + "capture `@{}` has incompatible struct fields across branches".to_string() + } + // Link pass errors with context Self::UnknownNodeType => "`{}` is not a valid node type".to_string(), Self::UnknownField => "`{}` is not a valid field".to_string(), diff --git a/crates/plotnik-lib/src/query/dependencies.rs b/crates/plotnik-lib/src/query/dependencies.rs index 9a24369e..c00d8fc8 100644 --- a/crates/plotnik-lib/src/query/dependencies.rs +++ b/crates/plotnik-lib/src/query/dependencies.rs @@ -8,7 +8,10 @@ //! which is useful for passes that need to process dependencies before //! dependents (like type inference). +use std::collections::HashMap; + use indexmap::{IndexMap, IndexSet}; +use plotnik_core::{Interner, Symbol}; use super::source_map::SourceId; use rowan::TextRange; @@ -17,6 +20,7 @@ use crate::Diagnostics; use crate::diagnostics::DiagnosticKind; use crate::parser::{AnonymousNode, Def, Expr, NamedNode, Ref, Root, SeqExpr}; use crate::query::symbol_table::SymbolTable; +use crate::query::type_check::DefId; use crate::query::visitor::{Visitor, walk_expr}; /// Result of dependency analysis. @@ -29,14 +33,85 @@ pub struct DependencyAnalysis { /// - Definitions within an SCC are mutually recursive. /// - Every definition in the symbol table appears exactly once. pub sccs: Vec>, + + /// Maps definition name (Symbol) to its DefId. + name_to_def: HashMap, + + /// Maps DefId to definition name Symbol (indexed by DefId). + def_names: Vec, +} + +impl DependencyAnalysis { + /// Get the DefId for a definition by Symbol. + pub fn def_id_by_symbol(&self, sym: Symbol) -> Option { + self.name_to_def.get(&sym).copied() + } + + /// Get the DefId for a definition name (requires interner for lookup). + pub fn def_id(&self, interner: &Interner, name: &str) -> Option { + // Linear scan - only used during analysis, not hot path + for (&sym, &def_id) in &self.name_to_def { + if interner.resolve(sym) == name { + return Some(def_id); + } + } + None + } + + /// Get the name Symbol for a DefId. + pub fn def_name_sym(&self, id: DefId) -> Symbol { + self.def_names[id.index()] + } + + /// Get the name string for a DefId. + pub fn def_name<'a>(&self, interner: &'a Interner, id: DefId) -> &'a str { + interner.resolve(self.def_names[id.index()]) + } + + /// Number of definitions. + pub fn def_count(&self) -> usize { + self.def_names.len() + } + + /// Get the def_names slice (for seeding TypeContext). + pub fn def_names(&self) -> &[Symbol] { + &self.def_names + } + + /// Get the name_to_def map (for seeding TypeContext). + pub fn name_to_def(&self) -> &HashMap { + &self.name_to_def + } } /// Analyze dependencies between definitions. /// -/// Returns the SCCs in reverse topological order. -pub fn analyze_dependencies(symbol_table: &SymbolTable) -> DependencyAnalysis { +/// Returns the SCCs in reverse topological order, with DefId mappings. +/// The interner is used to intern definition names as Symbols. +pub fn analyze_dependencies( + symbol_table: &SymbolTable, + interner: &mut Interner, +) -> DependencyAnalysis { let sccs = SccFinder::find(symbol_table); - DependencyAnalysis { sccs } + + // Assign DefIds in SCC order (leaves first, so dependencies get lower IDs) + let mut name_to_def = HashMap::new(); + let mut def_names = Vec::new(); + + for scc in &sccs { + for name in scc { + let sym = interner.intern(name); + let def_id = DefId::from_raw(def_names.len() as u32); + name_to_def.insert(sym, def_id); + def_names.push(sym); + } + } + + DependencyAnalysis { + sccs, + name_to_def, + def_names, + } } /// Validate recursion using the pre-computed dependency analysis. diff --git a/crates/plotnik-lib/src/query/dump.rs b/crates/plotnik-lib/src/query/dump.rs index b8f28dd8..6b5c5d17 100644 --- a/crates/plotnik-lib/src/query/dump.rs +++ b/crates/plotnik-lib/src/query/dump.rs @@ -36,5 +36,9 @@ mod test_helpers { pub fn dump_diagnostics_raw(&self) -> String { self.diagnostics().render(self.source_map()) } + + pub fn emit_typescript(&self) -> String { + crate::query::type_check::emit_typescript(self.type_context(), self.interner()) + } } } diff --git a/crates/plotnik-lib/src/query/expr_arity.rs b/crates/plotnik-lib/src/query/expr_arity.rs deleted file mode 100644 index cc6a4034..00000000 --- a/crates/plotnik-lib/src/query/expr_arity.rs +++ /dev/null @@ -1,224 +0,0 @@ -//! Expression arity analysis for query expressions. -//! -//! Determines whether an expression matches a single node position (`One`) -//! or multiple sequential positions (`Many`). Used to validate field constraints: -//! `field: expr` requires `expr` to have `ExprArity::One`. -//! -//! `Invalid` marks nodes where arity cannot be determined (error nodes, -//! undefined refs, etc.). - -use std::collections::HashMap; - -use super::query::AstMap; -use super::source_map::SourceId; -use super::symbol_table::SymbolTable; -use super::visitor::{Visitor, walk_expr, walk_field_expr}; -use crate::diagnostics::{DiagnosticKind, Diagnostics}; -use crate::parser::{Expr, FieldExpr, Ref, SeqExpr, SyntaxKind, SyntaxNode, ast}; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum ExprArity { - One, - Many, - Invalid, -} - -pub type ExprArityTable = HashMap; - -pub fn infer_arities( - ast_map: &AstMap, - symbol_table: &SymbolTable, - diag: &mut Diagnostics, -) -> ExprArityTable { - let mut arity_table = ExprArityTable::default(); - - for (&source_id, root) in ast_map { - let ctx = ArityContext { - symbol_table, - arity_table, - diag, - source_id, - }; - let mut computer = ArityComputer { ctx }; - computer.visit(root); - arity_table = computer.ctx.arity_table; - } - - for (&source_id, root) in ast_map { - let ctx = ArityContext { - symbol_table, - arity_table, - diag, - source_id, - }; - let mut validator = ArityValidator { ctx }; - validator.visit(root); - arity_table = validator.ctx.arity_table; - } - - arity_table -} - -pub fn resolve_arity(node: &SyntaxNode, table: &ExprArityTable) -> Option { - if node.kind() == SyntaxKind::Error { - return Some(ExprArity::Invalid); - } - - // Try casting to Expr first as it's the most common query - if let Some(expr) = ast::Expr::cast(node.clone()) { - return table.get(&expr).copied(); - } - - // Root: arity based on definition count - if let Some(root) = ast::Root::cast(node.clone()) { - return Some(if root.defs().nth(1).is_some() { - ExprArity::Many - } else { - ExprArity::One - }); - } - - // Def: delegate to body's arity - if let Some(def) = ast::Def::cast(node.clone()) { - return def.body().and_then(|b| table.get(&b).copied()); - } - - // Branch: delegate to body's arity - if let Some(branch) = ast::Branch::cast(node.clone()) { - return branch.body().and_then(|b| table.get(&b).copied()); - } - - None -} - -struct ArityContext<'a, 'd> { - symbol_table: &'a SymbolTable, - arity_table: ExprArityTable, - diag: &'d mut Diagnostics, - source_id: SourceId, -} - -impl ArityContext<'_, '_> { - fn compute_arity(&mut self, expr: &Expr) -> ExprArity { - if let Some(&c) = self.arity_table.get(expr) { - return c; - } - // Insert sentinel to break cycles (e.g., `Foo = (Foo)`) - self.arity_table.insert(expr.clone(), ExprArity::Invalid); - - let c = self.compute_single_arity(expr); - self.arity_table.insert(expr.clone(), c); - c - } - - fn compute_single_arity(&mut self, expr: &Expr) -> ExprArity { - match expr { - Expr::NamedNode(_) | Expr::AnonymousNode(_) | Expr::FieldExpr(_) | Expr::AltExpr(_) => { - ExprArity::One - } - - Expr::SeqExpr(seq) => self.seq_arity(seq), - - Expr::CapturedExpr(cap) => cap - .inner() - .map(|inner| self.compute_arity(&inner)) - .unwrap_or(ExprArity::Invalid), - - Expr::QuantifiedExpr(q) => q - .inner() - .map(|inner| self.compute_arity(&inner)) - .unwrap_or(ExprArity::Invalid), - - Expr::Ref(r) => self.ref_arity(r), - } - } - - fn seq_arity(&mut self, seq: &SeqExpr) -> ExprArity { - // Avoid collecting into Vec; check if we have 0, 1, or >1 children. - let mut children = seq.children(); - - match children.next() { - None => ExprArity::One, - Some(first) => { - if children.next().is_some() { - ExprArity::Many - } else { - self.compute_arity(&first) - } - } - } - } - - fn ref_arity(&mut self, r: &Ref) -> ExprArity { - let name_tok = r.name().expect( - "expr_arities: Ref without name token \ - (parser only creates Ref for PascalCase Id)", - ); - let name = name_tok.text(); - - self.symbol_table - .get(name) - .map(|body| self.compute_arity(body)) - .unwrap_or(ExprArity::Invalid) - } - - fn validate_field(&mut self, field: &FieldExpr) { - let Some(value) = field.value() else { - return; - }; - - let card = self - .arity_table - .get(&value) - .copied() - .unwrap_or(ExprArity::One); - - if card == ExprArity::Many { - let field_name = field - .name() - .map(|t| t.text().to_string()) - .unwrap_or_else(|| "field".to_string()); - - let mut builder = self - .diag - .report( - self.source_id, - DiagnosticKind::FieldSequenceValue, - value.text_range(), - ) - .message(field_name); - - // If value is a reference, add related info pointing to definition - if let Expr::Ref(r) = &value - && let Some(name_tok) = r.name() - && let Some((def_source, def_body)) = self.symbol_table.get_full(name_tok.text()) - { - builder = builder.related_to(def_source, def_body.text_range(), "defined here"); - } - - builder.emit(); - } - } -} - -struct ArityComputer<'a, 'd> { - ctx: ArityContext<'a, 'd>, -} - -impl Visitor for ArityComputer<'_, '_> { - fn visit_expr(&mut self, expr: &Expr) { - self.ctx.compute_arity(expr); - walk_expr(self, expr); - } -} - -struct ArityValidator<'a, 'd> { - ctx: ArityContext<'a, 'd>, -} - -impl Visitor for ArityValidator<'_, '_> { - fn visit_field_expr(&mut self, field: &FieldExpr) { - self.ctx.validate_field(field); - walk_field_expr(self, field); - } -} diff --git a/crates/plotnik-lib/src/query/expr_arity_tests.rs b/crates/plotnik-lib/src/query/expr_arity_tests.rs deleted file mode 100644 index 687e330a..00000000 --- a/crates/plotnik-lib/src/query/expr_arity_tests.rs +++ /dev/null @@ -1,511 +0,0 @@ -use crate::Query; -use indoc::indoc; - -#[test] -fn tree_is_one() { - let input = "Q = (identifier)"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ identifier - "); -} - -#[test] -fn singleton_seq_is_one() { - let input = "Q = {(identifier)}"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Seq¹ - NamedNode¹ identifier - "); -} - -#[test] -fn nested_singleton_seq_is_one() { - let input = "Q = {{{(identifier)}}}"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Seq¹ - Seq¹ - Seq¹ - NamedNode¹ identifier - "); -} - -#[test] -fn multi_seq_is_many() { - let input = "Q = {(a) (b)}"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def⁺ Q - Seq⁺ - NamedNode¹ a - NamedNode¹ b - "); -} - -#[test] -fn alt_is_one() { - let input = "Q = [(a) (b)]"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Alt¹ - Branch¹ - NamedNode¹ a - Branch¹ - NamedNode¹ b - "); -} - -#[test] -fn alt_with_seq_branches() { - let input = indoc! {r#" - Q = [{(a) (b)} (c)] - "#}; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Alt¹ - Branch⁺ - Seq⁺ - NamedNode¹ a - NamedNode¹ b - Branch¹ - NamedNode¹ c - "); -} - -#[test] -fn ref_to_tree_is_one() { - let input = indoc! {r#" - X = (identifier) - Q = (call (X)) - "#}; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root⁺ - Def¹ X - NamedNode¹ identifier - Def¹ Q - NamedNode¹ call - Ref¹ X - "); -} - -#[test] -fn ref_to_seq_is_many() { - let input = indoc! {r#" - X = {(a) (b)} - Q = (call (X)) - "#}; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root⁺ - Def⁺ X - Seq⁺ - NamedNode¹ a - NamedNode¹ b - Def¹ Q - NamedNode¹ call - Ref⁺ X - "); -} - -#[test] -fn field_with_tree() { - let input = "Q = (call name: (identifier))"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ call - FieldExpr¹ name: - NamedNode¹ identifier - "); -} - -#[test] -fn field_with_alt() { - let input = "Q = (call name: [(identifier) (string)])"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ call - FieldExpr¹ name: - Alt¹ - Branch¹ - NamedNode¹ identifier - Branch¹ - NamedNode¹ string - "); -} - -#[test] -fn field_with_seq_error() { - let input = "Q = (call name: {(a) (b)})"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: field `name` must match exactly one node, not a sequence - | - 1 | Q = (call name: {(a) (b)}) - | ^^^^^^^^^ - "); -} - -#[test] -fn field_with_ref_to_seq_error() { - let input = indoc! {r#" - X = {(a) (b)} - Q = (call name: (X)) - "#}; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: field `name` must match exactly one node, not a sequence - | - 1 | X = {(a) (b)} - | --------- defined here - 2 | Q = (call name: (X)) - | ^^^ - "); -} - -#[test] -fn quantifier_preserves_inner_arity() { - let input = "Q = (identifier)*"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - QuantifiedExpr¹ * - NamedNode¹ identifier - "); -} - -#[test] -fn capture_preserves_inner_arity() { - let input = "Q = (identifier) @name"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - CapturedExpr¹ @name - NamedNode¹ identifier - "); -} - -#[test] -fn capture_on_seq() { - let input = "Q = {(a) (b)} @items"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def⁺ Q - CapturedExpr⁺ @items - Seq⁺ - NamedNode¹ a - NamedNode¹ b - "); -} - -#[test] -fn complex_nested_arities() { - let input = indoc! {r#" - Stmt = [(expr_stmt) (return_stmt)] - Q = (function_definition - name: (identifier) @name - body: (block (Stmt)* @stmts)) - "#}; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root⁺ - Def¹ Stmt - Alt¹ - Branch¹ - NamedNode¹ expr_stmt - Branch¹ - NamedNode¹ return_stmt - Def¹ Q - NamedNode¹ function_definition - CapturedExpr¹ @name - FieldExpr¹ name: - NamedNode¹ identifier - FieldExpr¹ body: - NamedNode¹ block - CapturedExpr¹ @stmts - QuantifiedExpr¹ * - Ref¹ Stmt - "); -} - -#[test] -fn tagged_alt_arities() { - let input = indoc! {r#" - Q = [Ident: (identifier) Num: (number)] - "#}; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Alt¹ - Branch¹ Ident: - NamedNode¹ identifier - Branch¹ Num: - NamedNode¹ number - "); -} - -#[test] -fn anchor_has_no_arity() { - let input = "Q = (block . (statement))"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ block - . - NamedNode¹ statement - "); -} - -#[test] -fn negated_field_has_no_arity() { - let input = "Q = (function !async)"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ function - NegatedField !async - "); -} - -#[test] -fn tree_with_wildcard_type() { - let input = "Q = (_)"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ (any) - "); -} - -#[test] -fn bare_wildcard_is_one() { - let input = "Q = _"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - AnonymousNode¹ (any) - "); -} - -#[test] -fn empty_seq_is_one() { - let input = "Q = {}"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Seq¹ - "); -} - -#[test] -fn literal_is_one() { - let input = r#" - Q = "if" - "#; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r#" - Root¹ - Def¹ Q - AnonymousNode¹ "if" - "#); -} - -#[test] -fn invalid_error_node() { - let input = "Q = (foo %)"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: unexpected token; not valid inside a node — try `(child)` or close with `)` - | - 1 | Q = (foo %) - | ^ - "); -} - -#[test] -fn invalid_undefined_ref() { - let input = "Q = (Undefined)"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: `Undefined` is not defined - | - 1 | Q = (Undefined) - | ^^^^^^^^^ - "); -} - -#[test] -fn invalid_branch_without_body() { - let input = "Q = [A:]"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: expected an expression; after `Label:` - | - 1 | Q = [A:] - | ^ - "); -} - -#[test] -fn invalid_ref_to_bodyless_def() { - let input = indoc! {r#" - X = % - Q = (X) - "#}; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: expected an expression; after `=` in definition - | - 1 | X = % - | ^ - - error: `X` is not defined - | - 2 | Q = (X) - | ^ - "); -} - -#[test] -fn invalid_capture_without_inner() { - // Error recovery: `extra` is invalid, but `@y` still creates a Capture node - let input = "Q = (call extra @y)"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: bare identifier is not a valid expression; wrap in parentheses: `(identifier)` - | - 1 | Q = (call extra @y) - | ^^^^^ - "); -} - -#[test] -fn invalid_capture_without_inner_standalone() { - // Standalone capture without preceding expression - let input = "Q = @x"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: expected an expression; after `=` in definition - | - 1 | Q = @x - | ^ - "); -} - -#[test] -fn invalid_multiple_captures_with_error() { - let input = "Q = (call (Undefined) @x extra @y)"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: `Undefined` is not defined - | - 1 | Q = (call (Undefined) @x extra @y) - | ^^^^^^^^^ - - error: bare identifier is not a valid expression; wrap in parentheses: `(identifier)` - | - 1 | Q = (call (Undefined) @x extra @y) - | ^^^^^ - "); -} - -#[test] -fn invalid_quantifier_without_inner() { - // Error recovery: `extra` is invalid, but `*` still creates a Quantifier node - let input = "Q = (foo extra*)"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: bare identifier is not a valid expression; wrap in parentheses: `(identifier)` - | - 1 | Q = (foo extra*) - | ^^^^^ - "); -} diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index cdd8358a..f259e05a 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -8,10 +8,19 @@ use std::collections::HashMap; use indexmap::IndexSet; -use plotnik_core::{NodeFieldId, NodeTypeId}; +use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol}; use plotnik_langs::Lang; use rowan::TextRange; +/// Output from the link phase for binary emission. +#[derive(Default)] +pub struct LinkOutput { + /// Interned name → NodeTypeId (for binary: StringId → NodeTypeId) + pub node_type_ids: HashMap, + /// Interned name → NodeFieldId (for binary: StringId → NodeFieldId) + pub node_field_ids: HashMap, +} + use crate::diagnostics::{DiagnosticKind, Diagnostics}; use crate::parser::ast::{self, Expr, NamedNode}; use crate::parser::cst::{SyntaxKind, SyntaxToken}; @@ -28,22 +37,28 @@ use super::visitor::{Visitor, walk}; /// This function is decoupled from `Query` to allow easier testing and /// modularity. It orchestrates the resolution and validation phases. pub fn link<'q>( - ast_map: &AstMap, - source_map: &'q SourceMap, + interner: &mut Interner, lang: &Lang, + source_map: &'q SourceMap, + ast_map: &AstMap, symbol_table: &SymbolTable, - node_type_ids: &mut HashMap<&'q str, Option>, - node_field_ids: &mut HashMap<&'q str, Option>, + output: &mut LinkOutput, diagnostics: &mut Diagnostics, ) { + // Local deduplication maps (not exposed in output) + let mut node_type_ids: HashMap<&'q str, Option> = HashMap::new(); + let mut node_field_ids: HashMap<&'q str, Option> = HashMap::new(); + for (&source_id, root) in ast_map { let mut linker = Linker { - source_map, - source_id, + interner, lang, + source_map, symbol_table, - node_type_ids, - node_field_ids, + source_id, + node_type_ids: &mut node_type_ids, + node_field_ids: &mut node_field_ids, + output, diagnostics, }; linker.link(root); @@ -51,12 +66,15 @@ pub fn link<'q>( } struct Linker<'a, 'q> { - source_map: &'q SourceMap, - source_id: SourceId, + // Refs + interner: &'a mut Interner, lang: &'a Lang, + source_map: &'q SourceMap, symbol_table: &'a SymbolTable, + source_id: SourceId, node_type_ids: &'a mut HashMap<&'q str, Option>, node_field_ids: &'a mut HashMap<&'q str, Option>, + output: &'a mut LinkOutput, diagnostics: &'a mut Diagnostics, } @@ -96,6 +114,10 @@ impl<'a, 'q> Linker<'a, 'q> { let resolved = self.lang.resolve_named_node(type_name); self.node_type_ids .insert(token_src(&type_token, self.source()), resolved); + if let Some(id) = resolved { + let sym = self.interner.intern(type_name); + self.output.node_type_ids.entry(sym).or_insert(id); + } if resolved.is_none() { let all_types = self.lang.all_named_node_kinds(); let max_dist = (type_name.len() / 3).clamp(2, 4); @@ -133,7 +155,9 @@ impl<'a, 'q> Linker<'a, 'q> { let resolved = self.lang.resolve_field(field_name); self.node_field_ids .insert(token_src(&name_token, self.source()), resolved); - if resolved.is_some() { + if let Some(id) = resolved { + let sym = self.interner.intern(field_name); + self.output.node_field_ids.entry(sym).or_insert(id); return; } let all_fields = self.lang.all_field_names(); @@ -406,6 +430,10 @@ impl Visitor for NodeTypeCollector<'_, '_, '_> { self.linker .node_type_ids .insert(token_src(&value_token, self.linker.source()), resolved); + if let Some(id) = resolved { + let sym = self.linker.interner.intern(value); + self.linker.output.node_type_ids.entry(sym).or_insert(id); + } if resolved.is_none() { self.linker diff --git a/crates/plotnik-lib/src/query/link_tests.rs b/crates/plotnik-lib/src/query/link_tests.rs index 1599dfb1..7b98cce9 100644 --- a/crates/plotnik-lib/src/query/link_tests.rs +++ b/crates/plotnik-lib/src/query/link_tests.rs @@ -319,10 +319,10 @@ fn wildcard_node_skips_validation() { #[test] fn def_reference_with_link() { + // Test linking with definition reference as scalar list (no internal captures) let input = indoc! {r#" - Func = (function_declaration - name: (identifier) @name) @fn - Q = (program (Func)+) + Func = (function_declaration) + Q = (program (Func)+ @funcs) "#}; Query::expect_valid_linking(input, &LANG); diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 78d4944e..3dccff31 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -10,19 +10,17 @@ pub use symbol_table::SymbolTable; pub mod alt_kinds; mod dependencies; -pub mod expr_arity; pub mod link; #[allow(clippy::module_inception)] pub mod query; pub mod symbol_table; +pub mod type_check; pub mod visitor; #[cfg(test)] mod alt_kinds_tests; #[cfg(test)] mod dependencies_tests; -#[cfg(test)] -mod expr_arity_tests; #[cfg(all(test, feature = "plotnik-langs"))] mod link_tests; #[cfg(test)] @@ -31,3 +29,5 @@ mod printer_tests; mod query_tests; #[cfg(test)] mod symbol_table_tests; +#[cfg(test)] +mod type_check_tests; diff --git a/crates/plotnik-lib/src/query/printer.rs b/crates/plotnik-lib/src/query/printer.rs index 333abc5f..389e8722 100644 --- a/crates/plotnik-lib/src/query/printer.rs +++ b/crates/plotnik-lib/src/query/printer.rs @@ -8,8 +8,8 @@ use rowan::NodeOrToken; use crate::parser::{self as ast, Expr, SyntaxNode}; use super::Query; -use super::expr_arity::ExprArity; use super::source_map::SourceKind; +use super::type_check::Arity; pub struct QueryPrinter<'q> { query: &'q Query, @@ -385,9 +385,8 @@ impl<'q> QueryPrinter<'q> { return ""; } match self.query.get_arity(node) { - Some(ExprArity::One) => "¹", - Some(ExprArity::Many) => "⁺", - Some(ExprArity::Invalid) => "⁻", + Some(Arity::One) => "¹", + Some(Arity::Many) => "⁺", None => "ˣ", } } diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index 30da1c61..5d72e6b1 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -4,17 +4,17 @@ use std::ops::{Deref, DerefMut}; use indexmap::IndexMap; -use plotnik_core::{NodeFieldId, NodeTypeId}; +use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol}; use plotnik_langs::Lang; use crate::Diagnostics; use crate::parser::{ParseResult, Parser, Root, SyntaxNode, lexer::lex}; use crate::query::alt_kinds::validate_alt_kinds; use crate::query::dependencies; -use crate::query::expr_arity::{ExprArity, ExprArityTable, infer_arities, resolve_arity}; use crate::query::link; use crate::query::source_map::{SourceId, SourceMap}; use crate::query::symbol_table::{SymbolTable, resolve_names}; +use crate::query::type_check::{self, Arity, TypeContext}; const DEFAULT_QUERY_PARSE_FUEL: u32 = 1_000_000; const DEFAULT_QUERY_PARSE_MAX_DEPTH: u32 = 4096; @@ -104,10 +104,13 @@ impl QueryParsed { impl QueryParsed { pub fn analyze(mut self) -> QueryAnalyzed { + // Create shared interner for all phases + let mut interner = Interner::new(); + // Use reference-based structures for processing let symbol_table = resolve_names(&self.source_map, &self.ast_map, &mut self.diag); - let dependency_analysis = dependencies::analyze_dependencies(&symbol_table); + let dependency_analysis = dependencies::analyze_dependencies(&symbol_table, &mut interner); dependencies::validate_recursion( &dependency_analysis, &self.ast_map, @@ -115,12 +118,20 @@ impl QueryParsed { &mut self.diag, ); - let arity_table = infer_arities(&self.ast_map, &symbol_table, &mut self.diag); + // Unified type checking pass + let type_context = type_check::infer_types( + &mut interner, + &self.ast_map, + &symbol_table, + &dependency_analysis, + &mut self.diag, + ); QueryAnalyzed { query_parsed: self, + interner, symbol_table, - arity_table, + type_context, } } @@ -141,8 +152,9 @@ pub type Query = QueryAnalyzed; pub struct QueryAnalyzed { query_parsed: QueryParsed, + interner: Interner, pub symbol_table: SymbolTable, - arity_table: ExprArityTable, + type_context: TypeContext, } impl QueryAnalyzed { @@ -150,39 +162,61 @@ impl QueryAnalyzed { !self.diag.has_errors() } - pub fn get_arity(&self, node: &SyntaxNode) -> Option { - resolve_arity(node, &self.arity_table) + pub fn get_arity(&self, node: &SyntaxNode) -> Option { + use crate::parser::ast::{self, Expr}; + + // Try casting to Expr first as it's the most common query + if let Some(expr) = ast::Expr::cast(node.clone()) { + return self.type_context.get_arity(&expr); + } + + // Root: arity based on definition count + if let Some(root) = ast::Root::cast(node.clone()) { + return Some(if root.defs().nth(1).is_some() { + Arity::Many + } else { + Arity::One + }); + } + + // Def: delegate to body's arity + if let Some(def) = ast::Def::cast(node.clone()) { + return def.body().and_then(|b| self.type_context.get_arity(&b)); + } + + // Branch: delegate to body's arity + if let Some(branch) = ast::Branch::cast(node.clone()) { + return branch.body().and_then(|b| self.type_context.get_arity(&b)); + } + + None + } + + pub fn type_context(&self) -> &TypeContext { + &self.type_context + } + + pub fn interner(&self) -> &Interner { + &self.interner } pub fn link(mut self, lang: &Lang) -> LinkedQuery { - // Use reference-based hash maps during processing - let mut type_ids: HashMap<&str, Option> = HashMap::new(); - let mut field_ids: HashMap<&str, Option> = HashMap::new(); + let mut output = link::LinkOutput::default(); link::link( - &self.query_parsed.ast_map, - &self.query_parsed.source_map, + &mut self.interner, lang, + &self.query_parsed.source_map, + &self.query_parsed.ast_map, &self.symbol_table, - &mut type_ids, - &mut field_ids, + &mut output, &mut self.query_parsed.diag, ); - // Convert to owned for storage - let type_ids_owned = type_ids - .into_iter() - .map(|(k, v)| (k.to_owned(), v)) - .collect(); - let field_ids_owned = field_ids - .into_iter() - .map(|(k, v)| (k.to_owned(), v)) - .collect(); - LinkedQuery { inner: self, - type_ids: type_ids_owned, - field_ids: field_ids_owned, + node_type_ids: output.node_type_ids, + node_field_ids: output.node_field_ids, } } } @@ -211,13 +245,24 @@ impl TryFrom<&str> for QueryAnalyzed { } } -type NodeTypeIdTableOwned = HashMap>; -type NodeFieldIdTableOwned = HashMap>; - pub struct LinkedQuery { inner: QueryAnalyzed, - type_ids: NodeTypeIdTableOwned, - field_ids: NodeFieldIdTableOwned, + node_type_ids: HashMap, + node_field_ids: HashMap, +} + +impl LinkedQuery { + pub fn interner(&self) -> &Interner { + &self.inner.interner + } + + pub fn node_type_ids(&self) -> &HashMap { + &self.node_type_ids + } + + pub fn node_field_ids(&self) -> &HashMap { + &self.node_field_ids + } } impl Deref for LinkedQuery { diff --git a/crates/plotnik-lib/src/query/query_tests.rs b/crates/plotnik-lib/src/query/query_tests.rs index 9aa44c11..4d3f3458 100644 --- a/crates/plotnik-lib/src/query/query_tests.rs +++ b/crates/plotnik-lib/src/query/query_tests.rs @@ -76,6 +76,18 @@ impl QueryAnalyzed { query.dump_diagnostics() } + #[track_caller] + pub fn expect_valid_types(src: &str) -> String { + let query = Self::parse_and_validate(src); + if !query.is_valid() { + panic!( + "Expected valid types, got error:\n{}", + query.dump_diagnostics() + ); + } + query.emit_typescript() + } + #[track_caller] pub fn expect_invalid(src: &str) -> String { let source_map = SourceMap::one_liner(src); diff --git a/crates/plotnik-lib/src/query/type_check/context.rs b/crates/plotnik-lib/src/query/type_check/context.rs new file mode 100644 index 00000000..5f45ff94 --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/context.rs @@ -0,0 +1,324 @@ +//! TypeContext: manages interned types, symbols, and term info cache. +//! +//! Types are interned to enable cheap equality checks and cycle handling. +//! Symbols are stored but resolved via external Interner reference. +//! TermInfo is cached per-expression to avoid recomputation. + +use std::collections::{BTreeMap, HashMap, HashSet}; + +use crate::parser::ast::Expr; + +use super::symbol::{DefId, Interner, Symbol}; +use super::types::{ + Arity, FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeId, TypeKind, +}; + +/// Central registry for types, symbols, and expression metadata. +#[derive(Debug, Clone)] +pub struct TypeContext { + types: Vec, + type_map: HashMap, + + def_names: Vec, + def_ids: HashMap, + /// Definition-level type info (for TypeScript emission), keyed by DefId + def_types: HashMap, + /// Definitions that are part of a recursive SCC + recursive_defs: HashSet, + + term_info: HashMap, +} + +impl Default for TypeContext { + fn default() -> Self { + Self::new() + } +} + +impl TypeContext { + pub fn new() -> Self { + let mut ctx = Self { + types: Vec::new(), + type_map: HashMap::new(), + def_names: Vec::new(), + def_ids: HashMap::new(), + def_types: HashMap::new(), + recursive_defs: HashSet::new(), + term_info: HashMap::new(), + }; + + // Pre-register builtin types at their expected IDs + let void_id = ctx.intern_type(TypeKind::Void); + debug_assert_eq!(void_id, TYPE_VOID); + + let node_id = ctx.intern_type(TypeKind::Node); + debug_assert_eq!(node_id, TYPE_NODE); + + let string_id = ctx.intern_type(TypeKind::String); + debug_assert_eq!(string_id, TYPE_STRING); + + ctx + } + + /// Seed definition mappings from DependencyAnalysis. + /// This avoids re-registering definitions that were already assigned DefIds. + pub fn seed_defs(&mut self, def_names: &[Symbol], name_to_def: &HashMap) { + self.def_names = def_names.to_vec(); + self.def_ids = name_to_def.clone(); + } + + /// Intern a type, returning its ID. Deduplicates identical types. + pub fn intern_type(&mut self, kind: TypeKind) -> TypeId { + if let Some(&id) = self.type_map.get(&kind) { + return id; + } + + let id = TypeId(self.types.len() as u32); + self.types.push(kind.clone()); + self.type_map.insert(kind, id); + id + } + + /// Get the TypeKind for a TypeId. + pub fn get_type(&self, id: TypeId) -> Option<&TypeKind> { + self.types.get(id.0 as usize) + } + + /// Get or create a type, returning both the ID and a reference. + pub fn get_or_intern(&mut self, kind: TypeKind) -> (TypeId, &TypeKind) { + let id = self.intern_type(kind); + (id, &self.types[id.0 as usize]) + } + + /// Intern a struct type from fields. + pub fn intern_struct(&mut self, fields: BTreeMap) -> TypeId { + self.intern_type(TypeKind::Struct(fields)) + } + + /// Intern a struct type with a single field. + pub fn intern_single_field(&mut self, name: Symbol, info: FieldInfo) -> TypeId { + self.intern_type(TypeKind::Struct(BTreeMap::from([(name, info)]))) + } + + /// Get struct fields from a TypeId, if it points to a Struct. + pub fn get_struct_fields(&self, id: TypeId) -> Option<&BTreeMap> { + match self.get_type(id)? { + TypeKind::Struct(fields) => Some(fields), + _ => None, + } + } + + /// Cache term info for an expression. + pub fn set_term_info(&mut self, expr: Expr, info: TermInfo) { + self.term_info.insert(expr, info); + } + + /// Get cached term info for an expression. + pub fn get_term_info(&self, expr: &Expr) -> Option<&TermInfo> { + self.term_info.get(expr) + } + + /// Register a definition by name, returning its DefId. + pub fn register_def(&mut self, interner: &mut Interner, name: &str) -> DefId { + let sym = interner.intern(name); + self.register_def_sym(sym) + } + + /// Register a definition by pre-interned Symbol, returning its DefId. + pub fn register_def_sym(&mut self, sym: Symbol) -> DefId { + if let Some(&def_id) = self.def_ids.get(&sym) { + return def_id; + } + + let def_id = DefId::from_raw(self.def_names.len() as u32); + self.def_names.push(sym); + self.def_ids.insert(sym, def_id); + def_id + } + + /// Get DefId for a definition by Symbol. + pub fn get_def_id_sym(&self, sym: Symbol) -> Option { + self.def_ids.get(&sym).copied() + } + + /// Get DefId for a definition name (requires interner for lookup). + pub fn get_def_id(&self, interner: &Interner, name: &str) -> Option { + // Linear scan - only used during analysis, not hot path. + // Necessary because we don't assume Interner has reverse lookup here. + self.def_ids + .iter() + .find_map(|(&sym, &id)| (interner.resolve(sym) == name).then_some(id)) + } + + /// Get the name Symbol for a DefId. + pub fn def_name_sym(&self, def_id: DefId) -> Symbol { + self.def_names[def_id.index()] + } + + /// Get the name string for a DefId. + pub fn def_name<'a>(&self, interner: &'a Interner, def_id: DefId) -> &'a str { + interner.resolve(self.def_names[def_id.index()]) + } + + /// Mark a definition as recursive. + pub fn mark_recursive(&mut self, def_id: DefId) { + self.recursive_defs.insert(def_id); + } + + /// Check if a definition is recursive. + pub fn is_recursive(&self, def_id: DefId) -> bool { + self.recursive_defs.contains(&def_id) + } + + /// Register the output type for a definition by DefId. + pub fn set_def_type(&mut self, def_id: DefId, type_id: TypeId) { + self.def_types.insert(def_id, type_id); + } + + /// Register the output type for a definition by string name. + /// Registers the def if not already known. + pub fn set_def_type_by_name(&mut self, interner: &mut Interner, name: &str, type_id: TypeId) { + let def_id = self.register_def(interner, name); + self.set_def_type(def_id, type_id); + } + + /// Get the output type for a definition by DefId. + pub fn get_def_type(&self, def_id: DefId) -> Option { + self.def_types.get(&def_id).copied() + } + + /// Get the output type for a definition by string name. + pub fn get_def_type_by_name(&self, interner: &Interner, name: &str) -> Option { + let id = self.get_def_id(interner, name)?; + self.get_def_type(id) + } + + /// Get arity for an expression. + pub fn get_arity(&self, expr: &Expr) -> Option { + self.term_info.get(expr).map(|info| info.arity) + } + + /// Iterate over all interned types. + pub fn iter_types(&self) -> impl Iterator { + self.types + .iter() + .enumerate() + .map(|(i, k)| (TypeId(i as u32), k)) + } + + /// Number of interned types. + pub fn type_count(&self) -> usize { + self.types.len() + } + + /// Iterate over all definition types as (DefId, TypeId) in DefId order. + /// DefId order corresponds to SCC processing order (leaves first). + pub fn iter_def_types(&self) -> impl Iterator + '_ { + (0..self.def_names.len()).filter_map(|i| { + let def_id = DefId::from_raw(i as u32); + self.def_types + .get(&def_id) + .map(|&type_id| (def_id, type_id)) + }) + } + + /// Number of registered definitions. + pub fn def_count(&self) -> usize { + self.def_names.len() + } +} + +#[cfg(test)] +mod tests { + use std::collections::BTreeMap; + + use super::*; + use crate::query::type_check::types::FieldInfo; + + #[test] + fn builtin_types_have_correct_ids() { + let ctx = TypeContext::new(); + + assert_eq!(ctx.get_type(TYPE_VOID), Some(&TypeKind::Void)); + assert_eq!(ctx.get_type(TYPE_NODE), Some(&TypeKind::Node)); + assert_eq!(ctx.get_type(TYPE_STRING), Some(&TypeKind::String)); + } + + #[test] + fn type_interning_deduplicates() { + let mut ctx = TypeContext::new(); + + let id1 = ctx.intern_type(TypeKind::Node); + let id2 = ctx.intern_type(TypeKind::Node); + + assert_eq!(id1, id2); + assert_eq!(id1, TYPE_NODE); + } + + #[test] + fn struct_types_intern_correctly() { + let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); + + let x_sym = interner.intern("x"); + let mut fields = BTreeMap::new(); + fields.insert(x_sym, FieldInfo::required(TYPE_NODE)); + + let id1 = ctx.intern_type(TypeKind::Struct(fields.clone())); + let id2 = ctx.intern_type(TypeKind::Struct(fields)); + + assert_eq!(id1, id2); + } + + #[test] + fn symbol_interning_works() { + let mut interner = Interner::new(); + + let a = interner.intern("foo"); + let b = interner.intern("foo"); + let c = interner.intern("bar"); + + assert_eq!(a, b); + assert_ne!(a, c); + assert_eq!(interner.resolve(a), "foo"); + assert_eq!(interner.resolve(c), "bar"); + } + + #[test] + fn def_type_by_name() { + let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); + + ctx.set_def_type_by_name(&mut interner, "Query", TYPE_NODE); + assert_eq!( + ctx.get_def_type_by_name(&interner, "Query"), + Some(TYPE_NODE) + ); + assert_eq!(ctx.get_def_type_by_name(&interner, "Missing"), None); + } + + #[test] + fn register_def_returns_stable_id() { + let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); + + let id1 = ctx.register_def(&mut interner, "Foo"); + let id2 = ctx.register_def(&mut interner, "Bar"); + let id3 = ctx.register_def(&mut interner, "Foo"); // duplicate + + assert_eq!(id1, id3); + assert_ne!(id1, id2); + assert_eq!(ctx.def_name(&interner, id1), "Foo"); + assert_eq!(ctx.def_name(&interner, id2), "Bar"); + } + + #[test] + fn def_id_lookup() { + let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); + + ctx.register_def(&mut interner, "Query"); + assert!(ctx.get_def_id(&interner, "Query").is_some()); + assert!(ctx.get_def_id(&interner, "Missing").is_none()); + } +} diff --git a/crates/plotnik-lib/src/query/type_check/emit_ts.rs b/crates/plotnik-lib/src/query/type_check/emit_ts.rs new file mode 100644 index 00000000..3f30a4cb --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/emit_ts.rs @@ -0,0 +1,690 @@ +//! TypeScript type emitter for testing type inference. +//! +//! Converts inferred types to TypeScript declarations. +//! Used as a test oracle to verify type inference correctness. + +use std::collections::hash_map::Entry; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; + +use plotnik_core::Interner; + +use super::context::TypeContext; +use super::symbol::Symbol; +use super::types::{FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TypeId, TypeKind}; + +/// Naming context for synthetic type names: (DefName, FieldName) +#[derive(Clone, Debug)] +struct NamingContext { + def_name: String, + field_name: Option, +} + +/// Configuration for TypeScript emission. +#[derive(Clone, Debug)] +pub struct EmitConfig { + /// Whether to export types + pub export: bool, + /// Whether to emit the Node type definition + pub emit_node_type: bool, + /// Name for the root type if unnamed + pub root_type_name: String, + /// Use verbose node representation (with kind, text, etc.) + pub verbose_nodes: bool, +} + +impl Default for EmitConfig { + fn default() -> Self { + Self { + export: true, + emit_node_type: true, + root_type_name: "Query".to_string(), + verbose_nodes: false, + } + } +} + +/// TypeScript emitter. +pub struct TsEmitter<'a> { + ctx: &'a TypeContext, + interner: &'a Interner, + config: EmitConfig, + + /// Generated type names, to avoid collisions + used_names: BTreeSet, + /// TypeId -> generated name mapping + type_names: HashMap, + + /// Track which builtin types are referenced + referenced_builtins: HashSet, + /// Track which types have been emitted + emitted: HashSet, + /// Output buffer + output: String, +} + +impl<'a> TsEmitter<'a> { + pub fn new(ctx: &'a TypeContext, interner: &'a Interner, config: EmitConfig) -> Self { + Self { + ctx, + interner, + config, + used_names: BTreeSet::new(), + type_names: HashMap::new(), + referenced_builtins: HashSet::new(), + emitted: HashSet::new(), + output: String::new(), + } + } + + /// Emit TypeScript for all definition types. + pub fn emit(mut self) -> String { + self.prepare_emission(); + + // Collect all definitions, tracking primary name per TypeId and aliases + let mut primary_names: HashMap = HashMap::new(); + let mut aliases: Vec<(String, TypeId)> = Vec::new(); + + for (def_id, type_id) in self.ctx.iter_def_types() { + let name = self.ctx.def_name(self.interner, def_id).to_string(); + if let Entry::Vacant(e) = primary_names.entry(type_id) { + e.insert(name); + } else { + // This TypeId already has a primary definition; this becomes an alias + aliases.push((name, type_id)); + } + } + + // Collect all reachable types starting from definitions + let mut to_emit = HashSet::new(); + for (_, type_id) in self.ctx.iter_def_types() { + self.collect_reachable_types(type_id, &mut to_emit); + } + + // Emit in topological order + for type_id in self.sort_topologically(to_emit) { + if let Some(def_name) = primary_names.get(&type_id) { + self.emit_type_definition(def_name, type_id); + } else { + self.emit_generated_or_custom(type_id); + } + } + + // Emit type aliases for definitions that share a TypeId with another definition + for (alias_name, type_id) in aliases { + if let Some(primary_name) = primary_names.get(&type_id) { + self.emit_type_alias(&alias_name, primary_name); + } + } + + self.output + } + + /// Emit TypeScript for a single definition. + pub fn emit_single(mut self, name: &str, type_id: TypeId) -> String { + self.prepare_emission(); + + let mut to_emit = HashSet::new(); + self.collect_reachable_types(type_id, &mut to_emit); + + let sorted = self.sort_topologically(to_emit); + + // Emit dependencies (everything except the root) + for &dep_id in &sorted { + if dep_id != type_id { + self.emit_generated_or_custom(dep_id); + } + } + + // Emit the main definition + self.emit_type_definition(name, type_id); + self.output + } + + fn prepare_emission(&mut self) { + self.assign_generated_names(); + self.collect_builtin_references(); + + if self.config.emit_node_type && self.referenced_builtins.contains(&TYPE_NODE) { + self.emit_node_interface(); + } + } + + fn assign_generated_names(&mut self) { + // 1. Reserve definition names to avoid collisions + for (def_id, _) in self.ctx.iter_def_types() { + let name = self.ctx.def_name(self.interner, def_id); + self.used_names.insert(to_pascal_case(name)); + } + + // 2. Collect naming contexts (path from definition to type) + let mut contexts = HashMap::new(); + for (def_id, type_id) in self.ctx.iter_def_types() { + let def_name = self.ctx.def_name(self.interner, def_id); + self.collect_naming_contexts( + type_id, + &NamingContext { + def_name: def_name.to_string(), + field_name: None, + }, + &mut contexts, + ); + } + + // 3. Assign names to types that need them + for (id, kind) in self.ctx.iter_types() { + if !self.needs_generated_name(kind) || self.type_names.contains_key(&id) { + continue; + } + + let name = if let Some(ctx) = contexts.get(&id) { + self.generate_contextual_name(ctx) + } else { + self.generate_fallback_name(kind) + }; + self.type_names.insert(id, name); + } + } + + fn collect_naming_contexts( + &self, + type_id: TypeId, + ctx: &NamingContext, + contexts: &mut HashMap, + ) { + if type_id.is_builtin() || contexts.contains_key(&type_id) { + return; + } + + let Some(kind) = self.ctx.get_type(type_id) else { + return; + }; + + match kind { + TypeKind::Struct(fields) => { + contexts.entry(type_id).or_insert_with(|| ctx.clone()); + for (&field_sym, info) in fields { + let field_name = self.interner.resolve(field_sym); + let field_ctx = NamingContext { + def_name: ctx.def_name.clone(), + field_name: Some(field_name.to_string()), + }; + self.collect_naming_contexts(info.type_id, &field_ctx, contexts); + } + } + TypeKind::Enum(_) => { + contexts.entry(type_id).or_insert_with(|| ctx.clone()); + } + TypeKind::Array { element, .. } => { + self.collect_naming_contexts(*element, ctx, contexts); + } + TypeKind::Optional(inner) => { + self.collect_naming_contexts(*inner, ctx, contexts); + } + _ => {} + } + } + + fn collect_builtin_references(&mut self) { + for (_, type_id) in self.ctx.iter_def_types() { + self.collect_refs_recursive(type_id); + } + } + + fn collect_refs_recursive(&mut self, type_id: TypeId) { + if type_id == TYPE_NODE || type_id == TYPE_STRING { + self.referenced_builtins.insert(type_id); + return; + } + if type_id == TYPE_VOID { + return; + } + + let Some(kind) = self.ctx.get_type(type_id) else { + return; + }; + + match kind { + TypeKind::Node | TypeKind::Custom(_) => { + self.referenced_builtins.insert(TYPE_NODE); + } + TypeKind::String => { + self.referenced_builtins.insert(TYPE_STRING); + } + TypeKind::Struct(fields) => { + fields + .values() + .for_each(|info| self.collect_refs_recursive(info.type_id)); + } + TypeKind::Enum(variants) => { + variants + .values() + .for_each(|&tid| self.collect_refs_recursive(tid)); + } + TypeKind::Array { element, .. } => self.collect_refs_recursive(*element), + TypeKind::Optional(inner) => self.collect_refs_recursive(*inner), + _ => {} + } + } + + fn sort_topologically(&self, types: HashSet) -> Vec { + let mut deps: HashMap> = HashMap::new(); + let mut rdeps: HashMap> = HashMap::new(); + + for &tid in &types { + deps.entry(tid).or_default(); + rdeps.entry(tid).or_default(); + } + + // Build dependency graph + for &tid in &types { + for dep in self.get_direct_deps(tid) { + if types.contains(&dep) && dep != tid { + deps.entry(tid).or_default().insert(dep); + rdeps.entry(dep).or_default().insert(tid); + } + } + } + + // Kahn's algorithm + let mut result = Vec::with_capacity(types.len()); + let mut queue: Vec = deps + .iter() + .filter(|(_, d)| d.is_empty()) + .map(|(&tid, _)| tid) + .collect(); + + // Sort for deterministic output + queue.sort_by_key(|tid| tid.0); + + while let Some(tid) = queue.pop() { + result.push(tid); + if let Some(dependents) = rdeps.get(&tid) { + for &dependent in dependents { + if let Some(dep_set) = deps.get_mut(&dependent) { + dep_set.remove(&tid); + if dep_set.is_empty() { + queue.push(dependent); + queue.sort_by_key(|t| t.0); + } + } + } + } + } + + result + } + + fn collect_reachable_types(&self, type_id: TypeId, out: &mut HashSet) { + if type_id.is_builtin() || out.contains(&type_id) { + return; + } + + let Some(kind) = self.ctx.get_type(type_id) else { + return; + }; + + match kind { + TypeKind::Struct(fields) => { + out.insert(type_id); + for info in fields.values() { + self.collect_reachable_types(info.type_id, out); + } + } + TypeKind::Enum(_) | TypeKind::Custom(_) => { + out.insert(type_id); + } + TypeKind::Array { element, .. } => self.collect_reachable_types(*element, out), + TypeKind::Optional(inner) => self.collect_reachable_types(*inner, out), + _ => {} + } + } + + fn get_direct_deps(&self, type_id: TypeId) -> Vec { + let Some(kind) = self.ctx.get_type(type_id) else { + return vec![]; + }; + match kind { + TypeKind::Struct(fields) => fields + .values() + .flat_map(|info| self.unwrap_for_deps(info.type_id)) + .collect(), + TypeKind::Enum(variants) => variants + .values() + .flat_map(|&tid| self.unwrap_for_deps(tid)) + .collect(), + TypeKind::Array { element, .. } => self.unwrap_for_deps(*element), + TypeKind::Optional(inner) => self.unwrap_for_deps(*inner), + _ => vec![], + } + } + + fn unwrap_for_deps(&self, type_id: TypeId) -> Vec { + if type_id.is_builtin() { + return vec![]; + } + let Some(kind) = self.ctx.get_type(type_id) else { + return vec![]; + }; + match kind { + TypeKind::Array { element, .. } => self.unwrap_for_deps(*element), + TypeKind::Optional(inner) => self.unwrap_for_deps(*inner), + TypeKind::Struct(_) | TypeKind::Enum(_) | TypeKind::Custom(_) => vec![type_id], + _ => vec![], + } + } + + fn emit_generated_or_custom(&mut self, type_id: TypeId) { + if self.emitted.contains(&type_id) || type_id.is_builtin() { + return; + } + + if let Some(name) = self.type_names.get(&type_id).cloned() { + self.emit_generated_type_def(type_id, &name); + } else if let Some(TypeKind::Custom(sym)) = self.ctx.get_type(type_id) { + self.emit_custom_type_alias(self.interner.resolve(*sym)); + self.emitted.insert(type_id); + } + } + + fn emit_generated_type_def(&mut self, type_id: TypeId, name: &str) { + self.emitted.insert(type_id); + let export = if self.config.export { "export " } else { "" }; + let Some(kind) = self.ctx.get_type(type_id) else { + return; + }; + + match kind { + TypeKind::Struct(fields) => self.emit_interface(name, fields, export), + TypeKind::Enum(variants) => self.emit_tagged_union(name, variants, export), + _ => {} + } + } + + fn emit_type_definition(&mut self, name: &str, type_id: TypeId) { + self.emitted.insert(type_id); + let export = if self.config.export { "export " } else { "" }; + let type_name = to_pascal_case(name); + + let Some(kind) = self.ctx.get_type(type_id) else { + return; + }; + + match kind { + TypeKind::Struct(fields) => { + self.emit_interface(&type_name, fields, export); + } + TypeKind::Enum(variants) => { + self.emit_tagged_union(&type_name, variants, export); + } + _ => { + let ts_type = self.type_to_ts(type_id); + self.output + .push_str(&format!("{}type {} = {};\n\n", export, type_name, ts_type)); + } + } + } + + fn emit_interface(&mut self, name: &str, fields: &BTreeMap, export: &str) { + self.output + .push_str(&format!("{}interface {} {{\n", export, name)); + + for (&sym, info) in self.sort_map_by_name(fields) { + let field_name = self.interner.resolve(sym); + let ts_type = self.type_to_ts(info.type_id); + let optional = if info.optional { "?" } else { "" }; + self.output + .push_str(&format!(" {}{}: {};\n", field_name, optional, ts_type)); + } + + self.output.push_str("}\n\n"); + } + + fn emit_tagged_union(&mut self, name: &str, variants: &BTreeMap, export: &str) { + let mut variant_types = Vec::new(); + + for (&sym, &type_id) in variants { + let variant_name = self.interner.resolve(sym); + let variant_type_name = format!("{}{}", name, to_pascal_case(variant_name)); + variant_types.push(variant_type_name.clone()); + + let data_str = self.inline_data_type(type_id); + self.output.push_str(&format!( + "{}interface {} {{\n $tag: \"{}\";\n $data: {};\n}}\n\n", + export, variant_type_name, variant_name, data_str + )); + } + + let union = variant_types.join(" | "); + self.output + .push_str(&format!("{}type {} = {};\n\n", export, name, union)); + } + + fn emit_custom_type_alias(&mut self, name: &str) { + let export = if self.config.export { "export " } else { "" }; + self.output + .push_str(&format!("{}type {} = Node;\n\n", export, name)); + } + + fn emit_type_alias(&mut self, alias_name: &str, target_name: &str) { + let export = if self.config.export { "export " } else { "" }; + self.output.push_str(&format!( + "{}type {} = {};\n\n", + export, alias_name, target_name + )); + } + + fn emit_node_interface(&mut self) { + let export = if self.config.export { "export " } else { "" }; + if self.config.verbose_nodes { + self.output.push_str(&format!( + "{}interface Node {{\n kind: string;\n text: string;\n startPosition: {{ row: number; column: number }};\n endPosition: {{ row: number; column: number }};\n}}\n\n", + export + )); + } else { + self.output.push_str(&format!( + "{}interface Node {{\n kind: string;\n text: string;\n}}\n\n", + export + )); + } + } + + fn type_to_ts(&self, type_id: TypeId) -> String { + match type_id { + TYPE_VOID => return "void".to_string(), + TYPE_NODE => return "Node".to_string(), + TYPE_STRING => return "string".to_string(), + _ => {} + } + + let Some(kind) = self.ctx.get_type(type_id) else { + return "unknown".to_string(); + }; + + match kind { + TypeKind::Void => "void".to_string(), + TypeKind::Node => "Node".to_string(), + TypeKind::String => "string".to_string(), + TypeKind::Custom(sym) => self.interner.resolve(*sym).to_string(), + TypeKind::Ref(def_id) => to_pascal_case(self.ctx.def_name(self.interner, *def_id)), + + TypeKind::Struct(fields) => { + if let Some(name) = self.type_names.get(&type_id) { + name.clone() + } else { + self.inline_struct(fields) + } + } + TypeKind::Enum(variants) => { + if let Some(name) = self.type_names.get(&type_id) { + name.clone() + } else { + self.inline_enum(variants) + } + } + TypeKind::Array { element, non_empty } => { + let elem_type = self.type_to_ts(*element); + if *non_empty { + format!("[{}, ...{}[]]", elem_type, elem_type) + } else { + format!("{}[]", elem_type) + } + } + TypeKind::Optional(inner) => format!("{} | null", self.type_to_ts(*inner)), + } + } + + fn inline_struct(&self, fields: &BTreeMap) -> String { + if fields.is_empty() { + return "{}".to_string(); + } + + let field_strs: Vec<_> = self + .sort_map_by_name(fields) + .into_iter() + .map(|(&sym, info)| { + let name = self.interner.resolve(sym); + let ts_type = self.type_to_ts(info.type_id); + let optional = if info.optional { "?" } else { "" }; + format!("{}{}: {}", name, optional, ts_type) + }) + .collect(); + + format!("{{ {} }}", field_strs.join("; ")) + } + + fn inline_enum(&self, variants: &BTreeMap) -> String { + let variant_strs: Vec<_> = self + .sort_map_by_name(variants) + .into_iter() + .map(|(&sym, &type_id)| { + let name = self.interner.resolve(sym); + let data_type = self.type_to_ts(type_id); + format!("{{ $tag: \"{}\"; $data: {} }}", name, data_type) + }) + .collect(); + + variant_strs.join(" | ") + } + + fn inline_data_type(&self, type_id: TypeId) -> String { + let Some(kind) = self.ctx.get_type(type_id) else { + return "unknown".to_string(); + }; + + match kind { + TypeKind::Struct(fields) => self.inline_struct(fields), + TypeKind::Void => "{}".to_string(), + _ => self.type_to_ts(type_id), + } + } + + fn needs_generated_name(&self, kind: &TypeKind) -> bool { + matches!(kind, TypeKind::Struct(_) | TypeKind::Enum(_)) + } + + fn generate_contextual_name(&mut self, ctx: &NamingContext) -> String { + let base = if let Some(field) = &ctx.field_name { + format!("{}{}", to_pascal_case(&ctx.def_name), to_pascal_case(field)) + } else { + to_pascal_case(&ctx.def_name) + }; + self.unique_name(&base) + } + + fn generate_fallback_name(&mut self, kind: &TypeKind) -> String { + let base = match kind { + TypeKind::Struct(_) => "Struct", + TypeKind::Enum(_) => "Enum", + _ => "Type", + }; + self.unique_name(base) + } + + fn unique_name(&mut self, base: &str) -> String { + let base = to_pascal_case(base); + if self.used_names.insert(base.clone()) { + return base; + } + + let mut counter = 2; + loop { + let name = format!("{}{}", base, counter); + if self.used_names.insert(name.clone()) { + return name; + } + counter += 1; + } + } + + /// Helper to iterate map sorted by resolved symbol name. + fn sort_map_by_name<'b, T>(&self, map: &'b BTreeMap) -> Vec<(&'b Symbol, &'b T)> { + let mut items: Vec<_> = map.iter().collect(); + items.sort_by_key(|&(&sym, _)| self.interner.resolve(sym)); + items + } +} + +fn to_pascal_case(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + let mut capitalize_next = true; + + for c in s.chars() { + if c == '_' || c == '-' || c == '.' { + capitalize_next = true; + } else if capitalize_next { + result.extend(c.to_uppercase()); + capitalize_next = false; + } else { + result.push(c); + } + } + result +} + +pub fn emit_typescript(ctx: &TypeContext, interner: &Interner) -> String { + TsEmitter::new(ctx, interner, EmitConfig::default()).emit() +} + +pub fn emit_typescript_with_config( + ctx: &TypeContext, + interner: &Interner, + config: EmitConfig, +) -> String { + TsEmitter::new(ctx, interner, config).emit() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn to_pascal_case_works() { + assert_eq!(to_pascal_case("foo"), "Foo"); + assert_eq!(to_pascal_case("foo_bar"), "FooBar"); + assert_eq!(to_pascal_case("foo-bar"), "FooBar"); + assert_eq!(to_pascal_case("_"), ""); + assert_eq!(to_pascal_case("FooBar"), "FooBar"); + } + + #[test] + fn emit_node_type_only_when_referenced() { + // Empty context - Node should not be emitted + let ctx = TypeContext::new(); + let interner = Interner::new(); + let output = TsEmitter::new(&ctx, &interner, EmitConfig::default()).emit(); + assert!(!output.contains("interface Node")); + + // Context with a definition using Node - should emit Node + let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); + let x_sym = interner.intern("x"); + let mut fields = BTreeMap::new(); + fields.insert(x_sym, FieldInfo::required(TYPE_NODE)); + let struct_id = ctx.intern_type(TypeKind::Struct(fields)); + ctx.set_def_type_by_name(&mut interner, "Q", struct_id); + + let output = TsEmitter::new(&ctx, &interner, EmitConfig::default()).emit(); + assert!(output.contains("interface Node")); + assert!(output.contains("kind: string")); + } +} diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs new file mode 100644 index 00000000..e1390440 --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -0,0 +1,614 @@ +//! Bottom-up type inference visitor. +//! +//! Traverses the AST and computes TermInfo (Arity + TypeFlow) for each expression. +//! Reports diagnostics for type errors like strict dimensionality violations. + +use std::collections::BTreeMap; +use std::collections::btree_map::Entry; + +use plotnik_core::Interner; +use rowan::TextRange; + +use super::context::TypeContext; +use super::symbol::Symbol; +use super::types::{ + Arity, FieldInfo, QuantifierKind, TYPE_NODE, TYPE_STRING, TermInfo, TypeFlow, TypeId, TypeKind, +}; +use super::unify::{UnifyError, unify_flows}; + +use crate::diagnostics::{DiagnosticKind, Diagnostics}; +use crate::parser::ast::{ + AltExpr, AltKind, AnonymousNode, CapturedExpr, Def, Expr, FieldExpr, NamedNode, QuantifiedExpr, + Ref, Root, SeqExpr, +}; +use crate::parser::cst::SyntaxKind; +use crate::query::source_map::SourceId; +use crate::query::symbol_table::SymbolTable; +use crate::query::visitor::{Visitor, walk_alt_expr, walk_def, walk_named_node, walk_seq_expr}; + +/// Inference context for a single pass over the AST. +pub struct InferenceVisitor<'a, 'd> { + pub ctx: &'a mut TypeContext, + pub interner: &'a mut Interner, + pub symbol_table: &'a SymbolTable, + pub source_id: SourceId, + pub diag: &'d mut Diagnostics, +} + +impl<'a, 'd> InferenceVisitor<'a, 'd> { + pub fn new( + ctx: &'a mut TypeContext, + interner: &'a mut Interner, + symbol_table: &'a SymbolTable, + source_id: SourceId, + diag: &'d mut Diagnostics, + ) -> Self { + Self { + ctx, + interner, + symbol_table, + source_id, + diag, + } + } + + /// Infer the TermInfo for an expression, caching the result. + pub fn infer_expr(&mut self, expr: &Expr) -> TermInfo { + if let Some(info) = self.ctx.get_term_info(expr) { + return info.clone(); + } + + // Sentinel to break recursion cycles + self.ctx.set_term_info(expr.clone(), TermInfo::void()); + + let info = self.compute_expr(expr); + self.ctx.set_term_info(expr.clone(), info.clone()); + info + } + + fn compute_expr(&mut self, expr: &Expr) -> TermInfo { + match expr { + Expr::NamedNode(n) => self.infer_named_node(n), + Expr::AnonymousNode(n) => self.infer_anonymous_node(n), + Expr::Ref(r) => self.infer_ref(r), + Expr::SeqExpr(s) => self.infer_seq_expr(s), + Expr::AltExpr(a) => self.infer_alt_expr(a), + Expr::CapturedExpr(c) => self.infer_captured_expr(c), + Expr::QuantifiedExpr(q) => self.infer_quantified_expr(q), + Expr::FieldExpr(f) => self.infer_field_expr(f), + } + } + + /// Named node: matches one position, bubbles up child captures. + fn infer_named_node(&mut self, node: &NamedNode) -> TermInfo { + let mut merged_fields: BTreeMap = BTreeMap::new(); + + for child in node.children() { + let child_info = self.infer_expr(&child); + + if let TypeFlow::Bubble(type_id) = child_info.flow + && let Some(fields) = self.ctx.get_struct_fields(type_id) + { + for (name, info) in fields { + // Named nodes merge fields silently (union behavior) + merged_fields.entry(*name).or_insert(*info); + } + } + } + + let flow = if merged_fields.is_empty() { + TypeFlow::Void + } else { + TypeFlow::Bubble(self.ctx.intern_struct(merged_fields)) + }; + + TermInfo::new(Arity::One, flow) + } + + /// Anonymous node (literal or wildcard): matches one position, produces nothing. + fn infer_anonymous_node(&mut self, _node: &AnonymousNode) -> TermInfo { + TermInfo::new(Arity::One, TypeFlow::Void) + } + + /// Reference: transparent for non-recursive defs, opaque boundary for recursive ones. + fn infer_ref(&mut self, r: &Ref) -> TermInfo { + let Some(name_tok) = r.name() else { + return TermInfo::void(); + }; + let name = name_tok.text(); + let name_sym = self.interner.intern(name); + + // Recursive refs are opaque boundaries - they match but don't bubble captures. + // The Ref type is created when a recursive ref is captured (in infer_captured_expr). + if let Some(def_id) = self.ctx.get_def_id_sym(name_sym) + && self.ctx.is_recursive(def_id) + { + return TermInfo::new(Arity::One, TypeFlow::Void); + } + + let Some(body) = self.symbol_table.get(name) else { + return TermInfo::void(); + }; + + // Non-recursive refs are transparent + self.infer_expr(body) + } + + /// Sequence: Arity aggregation and strict field merging (no duplicates). + fn infer_seq_expr(&mut self, seq: &SeqExpr) -> TermInfo { + let children: Vec<_> = seq.children().collect(); + + let arity = match children.len() { + 0 | 1 => children + .first() + .map(|c| self.infer_expr(c).arity) + .unwrap_or(Arity::One), + _ => Arity::Many, + }; + + let mut merged_fields: BTreeMap = BTreeMap::new(); + + for child in &children { + let child_info = self.infer_expr(child); + + if let TypeFlow::Bubble(type_id) = child_info.flow { + // Clone fields to release immutable borrow on self.ctx, + // allowing mutable borrow of self for merge_seq_fields. + if let Some(fields) = self.ctx.get_struct_fields(type_id).cloned() { + self.merge_seq_fields(&mut merged_fields, &fields, child.text_range()); + } + } + } + + let flow = if merged_fields.is_empty() { + TypeFlow::Void + } else { + TypeFlow::Bubble(self.ctx.intern_struct(merged_fields)) + }; + + TermInfo::new(arity, flow) + } + + fn merge_seq_fields( + &mut self, + target: &mut BTreeMap, + source: &BTreeMap, + range: TextRange, + ) { + for (&name, &info) in source { + match target.entry(name) { + Entry::Vacant(e) => { + e.insert(info); + } + Entry::Occupied(_) => { + self.diag + .report( + self.source_id, + DiagnosticKind::DuplicateCaptureInScope, + range, + ) + .message(self.interner.resolve(name)) + .emit(); + } + } + } + } + + /// Alternation: arity is Many if ANY branch is Many. + fn infer_alt_expr(&mut self, alt: &AltExpr) -> TermInfo { + match alt.kind() { + AltKind::Tagged => self.infer_tagged_alt(alt), + AltKind::Untagged | AltKind::Mixed => self.infer_untagged_alt(alt), + } + } + + fn infer_tagged_alt(&mut self, alt: &AltExpr) -> TermInfo { + let mut variants: BTreeMap = BTreeMap::new(); + let mut combined_arity = Arity::One; + + for branch in alt.branches() { + let Some(label) = branch.label() else { + continue; + }; + let label_sym = self.interner.intern(label.text()); + + let Some(body) = branch.body() else { + // Empty variant -> empty struct + let empty_struct = self.ctx.intern_struct(BTreeMap::new()); + variants.insert(label_sym, empty_struct); + continue; + }; + + let body_info = self.infer_expr(&body); + combined_arity = combined_arity.combine(body_info.arity); + variants.insert(label_sym, self.flow_to_type(&body_info.flow)); + } + + let enum_type = self.ctx.intern_type(TypeKind::Enum(variants)); + TermInfo::new(combined_arity, TypeFlow::Scalar(enum_type)) + } + + fn infer_untagged_alt(&mut self, alt: &AltExpr) -> TermInfo { + let mut flows: Vec = Vec::new(); + let mut combined_arity = Arity::One; + + // Collect from branches + for branch in alt.branches() { + if let Some(body) = branch.body() { + let info = self.infer_expr(&body); + combined_arity = combined_arity.combine(info.arity); + flows.push(info.flow); + } + } + + // Collect from direct expressions + for expr in alt.exprs() { + let info = self.infer_expr(&expr); + combined_arity = combined_arity.combine(info.arity); + flows.push(info.flow); + } + + let unified_flow = match unify_flows(self.ctx, flows) { + Ok(flow) => flow, + Err(err) => { + self.report_unify_error(alt.text_range(), &err); + TypeFlow::Void + } + }; + + TermInfo::new(combined_arity, unified_flow) + } + + /// Captured expression: wraps inner's flow into a field. + fn infer_captured_expr(&mut self, cap: &CapturedExpr) -> TermInfo { + let Some(name_tok) = cap.name() else { + // Recover gracefully + return cap + .inner() + .map(|i| self.infer_expr(&i)) + .unwrap_or_else(TermInfo::void); + }; + let capture_name = self.interner.intern(name_tok.text()); + + let annotation_type = self.resolve_annotation(cap); + let Some(inner) = cap.inner() else { + // Capture without inner -> creates a Node field + let type_id = annotation_type.unwrap_or(TYPE_NODE); + let field = FieldInfo::required(type_id); + return TermInfo::new( + Arity::One, + TypeFlow::Bubble(self.ctx.intern_single_field(capture_name, field)), + ); + }; + + // Determine how inner flow relates to capture (e.g., ? makes field optional) + let (inner_info, is_optional) = self.resolve_capture_inner(&inner); + + let captured_type = self.determine_captured_type(&inner, &inner_info, annotation_type); + let field_info = if is_optional { + FieldInfo::optional(captured_type) + } else { + FieldInfo::required(captured_type) + }; + + TermInfo::new( + inner_info.arity, + TypeFlow::Bubble(self.ctx.intern_single_field(capture_name, field_info)), + ) + } + + /// Resolves explicit type annotation like `@foo: string`. + fn resolve_annotation(&mut self, cap: &CapturedExpr) -> Option { + cap.type_annotation().and_then(|t| { + t.name().map(|n| { + let text = n.text(); + if text == "string" { + TYPE_STRING + } else { + let sym = self.interner.intern(text); + self.ctx.intern_type(TypeKind::Custom(sym)) + } + }) + }) + } + + /// Logic for how quantifier on the inner expression affects the capture field. + /// Returns (Info, is_optional). + fn resolve_capture_inner(&mut self, inner: &Expr) -> (TermInfo, bool) { + if let Expr::QuantifiedExpr(q) = inner { + let quantifier = self.parse_quantifier(q); + match quantifier { + // * or + acts as row capture here (skipping strict dimensionality) + QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore => { + (self.infer_quantified_expr_as_row(q), false) + } + // ? makes the resulting capture field optional + QuantifierKind::Optional => (self.infer_expr(inner), true), + } + } else { + (self.infer_expr(inner), false) + } + } + + /// Transforms the inner flow into a specific TypeId for the field. + fn determine_captured_type( + &mut self, + inner: &Expr, + inner_info: &TermInfo, + annotation: Option, + ) -> TypeId { + match &inner_info.flow { + TypeFlow::Void => { + if let Some(ref_type) = self.get_recursive_ref_type(inner) { + annotation.unwrap_or(ref_type) + } else { + annotation.unwrap_or(TYPE_NODE) + } + } + TypeFlow::Scalar(type_id) => annotation.unwrap_or(*type_id), + TypeFlow::Bubble(type_id) => annotation.unwrap_or(*type_id), + } + } + + /// If expr is (or contains) a recursive Ref, return its Ref type. + fn get_recursive_ref_type(&mut self, expr: &Expr) -> Option { + match expr { + Expr::Ref(r) => { + let name_tok = r.name()?; + let name = name_tok.text(); + let sym = self.interner.intern(name); + let def_id = self.ctx.get_def_id_sym(sym)?; + if self.ctx.is_recursive(def_id) { + Some(self.ctx.intern_type(TypeKind::Ref(def_id))) + } else { + None + } + } + Expr::QuantifiedExpr(q) => self.get_recursive_ref_type(&q.inner()?), + Expr::CapturedExpr(c) => self.get_recursive_ref_type(&c.inner()?), + Expr::FieldExpr(f) => self.get_recursive_ref_type(&f.value()?), + _ => None, + } + } + + fn infer_quantified_expr(&mut self, quant: &QuantifiedExpr) -> TermInfo { + self.infer_quantified_expr_impl(quant, false) + } + + fn infer_quantified_expr_as_row(&mut self, quant: &QuantifiedExpr) -> TermInfo { + self.infer_quantified_expr_impl(quant, true) + } + + fn infer_quantified_expr_impl( + &mut self, + quant: &QuantifiedExpr, + is_row_capture: bool, + ) -> TermInfo { + let Some(inner) = quant.inner() else { + return TermInfo::void(); + }; + + let inner_info = self.infer_expr(&inner); + let quantifier = self.parse_quantifier(quant); + + let flow = match quantifier { + QuantifierKind::Optional => self.make_flow_optional(inner_info.flow), + QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore => { + if !is_row_capture { + self.check_strict_dimensionality(quant, &inner_info); + } + self.make_flow_array(inner_info.flow, &inner, quantifier.is_non_empty()) + } + }; + + TermInfo::new(inner_info.arity, flow) + } + + fn make_flow_optional(&mut self, flow: TypeFlow) -> TypeFlow { + match flow { + TypeFlow::Void => TypeFlow::Void, + TypeFlow::Scalar(t) => TypeFlow::Scalar(self.ctx.intern_type(TypeKind::Optional(t))), + TypeFlow::Bubble(type_id) => { + let fields = self + .ctx + .get_struct_fields(type_id) + .cloned() + .unwrap_or_default(); + let optional_fields = fields + .into_iter() + .map(|(k, v)| (k, v.make_optional())) + .collect(); + TypeFlow::Bubble(self.ctx.intern_struct(optional_fields)) + } + } + } + + fn make_flow_array(&mut self, flow: TypeFlow, inner: &Expr, non_empty: bool) -> TypeFlow { + match flow { + TypeFlow::Void => { + // Scalar list: void inner -> array of Node (or Ref) + let element = self.get_recursive_ref_type(inner).unwrap_or(TYPE_NODE); + let array_type = self.ctx.intern_type(TypeKind::Array { element, non_empty }); + TypeFlow::Scalar(array_type) + } + TypeFlow::Scalar(t) => { + let array_type = self.ctx.intern_type(TypeKind::Array { + element: t, + non_empty, + }); + TypeFlow::Scalar(array_type) + } + TypeFlow::Bubble(struct_type) => { + // Note: Bubble with * or + is strictly invalid unless it's a row capture, + // but we construct a valid type as fallback. + let array_type = self.ctx.intern_type(TypeKind::Array { + element: struct_type, + non_empty, + }); + TypeFlow::Scalar(array_type) + } + } + } + + /// Field expression: arity One, delegates type to value. + fn infer_field_expr(&mut self, field: &FieldExpr) -> TermInfo { + let Some(value) = field.value() else { + return TermInfo::void(); + }; + + let value_info = self.infer_expr(&value); + + // Validation: Fields cannot be assigned 'Many' arity values directly + if value_info.arity == Arity::Many { + self.report_field_arity_error(field, &value); + } + + TermInfo::new(Arity::One, value_info.flow) + } + + fn report_field_arity_error(&mut self, field: &FieldExpr, value: &Expr) { + let field_name = field + .name() + .map(|t| t.text().to_string()) + .unwrap_or_else(|| "field".to_string()); + + let mut builder = self.diag.report( + self.source_id, + DiagnosticKind::FieldSequenceValue, + value.text_range(), + ); + builder = builder.message(field_name); + + if let Expr::Ref(r) = value + && let Some(name_tok) = r.name() + { + let name = name_tok.text(); + if let Some((src, body)) = self.symbol_table.get_full(name) { + builder = builder.related_to(src, body.text_range(), "defined here"); + } + } + + builder.emit(); + } + + /// Check strict dimensionality rule for * and + quantifiers. + /// Captures inside a quantifier are forbidden unless marked as a row capture. + fn check_strict_dimensionality(&mut self, quant: &QuantifiedExpr, inner_info: &TermInfo) { + let TypeFlow::Bubble(type_id) = &inner_info.flow else { + return; + }; + + let Some(fields) = self.ctx.get_struct_fields(*type_id) else { + return; + }; + if fields.is_empty() { + return; + } + + let op = quant + .operator() + .map(|t| t.text().to_string()) + .unwrap_or_else(|| "*".to_string()); + + let capture_names: Vec<_> = fields + .keys() + .map(|s| format!("`@{}`", self.interner.resolve(*s))) + .collect(); + let captures_str = capture_names.join(", "); + + self.diag + .report( + self.source_id, + DiagnosticKind::StrictDimensionalityViolation, + quant.text_range(), + ) + .message(format!( + "quantifier `{}` contains captures ({}) but no row capture", + op, captures_str + )) + .hint("wrap as `{...}* @rows`") + .emit(); + } + + fn parse_quantifier(&self, quant: &QuantifiedExpr) -> QuantifierKind { + let Some(op) = quant.operator() else { + return QuantifierKind::ZeroOrMore; + }; + + match op.kind() { + SyntaxKind::Question | SyntaxKind::QuestionQuestion => QuantifierKind::Optional, + SyntaxKind::Star | SyntaxKind::StarQuestion => QuantifierKind::ZeroOrMore, + SyntaxKind::Plus | SyntaxKind::PlusQuestion => QuantifierKind::OneOrMore, + _ => QuantifierKind::ZeroOrMore, + } + } + + fn flow_to_type(&mut self, flow: &TypeFlow) -> TypeId { + match flow { + TypeFlow::Void => self.ctx.intern_struct(BTreeMap::new()), + TypeFlow::Scalar(t) | TypeFlow::Bubble(t) => *t, + } + } + + fn report_unify_error(&mut self, range: TextRange, err: &UnifyError) { + let (kind, msg) = match err { + UnifyError::ScalarInUntagged => ( + DiagnosticKind::IncompatibleTypes, + "scalar type in untagged alternation; use tagged alternation instead".to_string(), + ), + UnifyError::IncompatibleTypes { field } => ( + DiagnosticKind::IncompatibleCaptureTypes, + self.interner.resolve(*field).to_string(), + ), + UnifyError::IncompatibleStructs { field } => ( + DiagnosticKind::IncompatibleStructShapes, + self.interner.resolve(*field).to_string(), + ), + UnifyError::IncompatibleArrayElements { field } => ( + DiagnosticKind::IncompatibleCaptureTypes, + self.interner.resolve(*field).to_string(), + ), + }; + + self.diag + .report(self.source_id, kind, range) + .message(msg) + .emit(); + } +} + +impl Visitor for InferenceVisitor<'_, '_> { + fn visit_def(&mut self, def: &Def) { + walk_def(self, def); + } + + fn visit_expr(&mut self, expr: &Expr) { + self.infer_expr(expr); + } + + fn visit_named_node(&mut self, node: &NamedNode) { + // Bottom-up traversal + walk_named_node(self, node); + } + + fn visit_seq_expr(&mut self, seq: &SeqExpr) { + walk_seq_expr(self, seq); + } + + fn visit_alt_expr(&mut self, alt: &AltExpr) { + walk_alt_expr(self, alt); + } +} + +/// Run inference on all definitions in a root. +pub fn infer_root( + ctx: &mut TypeContext, + interner: &mut Interner, + symbol_table: &SymbolTable, + source_id: SourceId, + root: &Root, + diag: &mut Diagnostics, +) { + let mut visitor = InferenceVisitor::new(ctx, interner, symbol_table, source_id, diag); + visitor.visit(root); +} diff --git a/crates/plotnik-lib/src/query/type_check/mod.rs b/crates/plotnik-lib/src/query/type_check/mod.rs new file mode 100644 index 00000000..74cc553b --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/mod.rs @@ -0,0 +1,190 @@ +//! Unified type checking pass. +//! +//! Computes both structural arity (for field validation) and data flow types +//! (for TypeScript emission) in a single traversal. + +mod context; +mod emit_ts; +mod infer; +mod symbol; +mod types; +mod unify; + +pub use context::TypeContext; +pub use emit_ts::{EmitConfig, TsEmitter, emit_typescript, emit_typescript_with_config}; +pub use symbol::{DefId, Interner, Symbol}; +pub use types::{ + Arity, FieldInfo, QuantifierKind, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeFlow, + TypeId, TypeKind, +}; +pub use unify::{UnifyError, unify_flow, unify_flows}; + +use std::collections::BTreeMap; + +use indexmap::IndexMap; + +use crate::diagnostics::Diagnostics; +use crate::parser::ast::{self, Root}; +use crate::query::dependencies::DependencyAnalysis; +use crate::query::source_map::SourceId; +use crate::query::symbol_table::{SymbolTable, UNNAMED_DEF}; + +use infer::infer_root; + +/// Run type inference on all definitions. +/// +/// Processes definitions in dependency order (leaves first) to handle +/// recursive definitions correctly. +pub fn infer_types( + interner: &mut Interner, + ast_map: &IndexMap, + symbol_table: &SymbolTable, + dependency_analysis: &DependencyAnalysis, + diag: &mut Diagnostics, +) -> TypeContext { + let ctx = TypeContext::new(); + InferencePass { + ctx, + interner, + ast_map, + symbol_table, + dependency_analysis, + diag, + } + .run() +} + +struct InferencePass<'a> { + ctx: TypeContext, + interner: &'a mut Interner, + ast_map: &'a IndexMap, + symbol_table: &'a SymbolTable, + dependency_analysis: &'a DependencyAnalysis, + diag: &'a mut Diagnostics, +} + +impl<'a> InferencePass<'a> { + fn run(mut self) -> TypeContext { + // Avoid re-registration of definitions + self.ctx.seed_defs( + self.dependency_analysis.def_names(), + self.dependency_analysis.name_to_def(), + ); + + self.mark_recursion(); + self.process_sccs(); + self.process_orphans(); + + self.ctx + } + + /// Identify and mark recursive definitions. + /// A def is recursive if it's in an SCC with >1 member, or it references itself directly. + fn mark_recursion(&mut self) { + for scc in &self.dependency_analysis.sccs { + if self.is_scc_recursive(scc) { + for def_name in scc { + let sym = self.interner.intern(def_name); + if let Some(def_id) = self.ctx.get_def_id_sym(sym) { + self.ctx.mark_recursive(def_id); + } + } + } + } + } + + /// Process definitions in SCC order (leaves first). + fn process_sccs(&mut self) { + for scc in &self.dependency_analysis.sccs { + for def_name in scc { + if let Some(source_id) = self.symbol_table.source_id(def_name) { + self.infer_and_register(def_name, source_id); + } + } + } + } + + /// Handle any definitions not in an SCC (safety net). + fn process_orphans(&mut self) { + for (name, source_id, _body) in self.symbol_table.iter_full() { + // Skip if already processed + if self.ctx.get_def_type_by_name(self.interner, name).is_some() { + continue; + } + self.infer_and_register(name, source_id); + } + } + + fn infer_and_register(&mut self, def_name: &str, source_id: SourceId) { + let Some(root) = self.ast_map.get(&source_id) else { + return; + }; + + infer_root( + &mut self.ctx, + self.interner, + self.symbol_table, + source_id, + root, + self.diag, + ); + + // Register the definition's output type based on the inferred body flow + if let Some(body) = self.symbol_table.get(def_name) + && let Some(info) = self.ctx.get_term_info(body).cloned() + { + let type_id = self.flow_to_type_id(&info.flow); + self.ctx + .set_def_type_by_name(self.interner, def_name, type_id); + } + } + + fn is_scc_recursive(&self, scc: &[String]) -> bool { + if scc.len() > 1 { + return true; + } + + let Some(name) = scc.first() else { + return false; + }; + + let Some(body) = self.symbol_table.get(name) else { + return false; + }; + + body_references_self(body, name) + } + + fn flow_to_type_id(&mut self, flow: &TypeFlow) -> TypeId { + match flow { + TypeFlow::Void => self.ctx.intern_struct(BTreeMap::new()), + TypeFlow::Scalar(id) | TypeFlow::Bubble(id) => *id, + } + } +} + +/// Check if an expression body contains a reference to the given name. +fn body_references_self(body: &ast::Expr, name: &str) -> bool { + body.as_cst().descendants().any(|descendant| { + let Some(r) = ast::Ref::cast(descendant) else { + return false; + }; + + let Some(name_tok) = r.name() else { + return false; + }; + + name_tok.text() == name + }) +} + +/// Get the primary definition name (first non-underscore, or underscore if none). +pub fn primary_def_name(symbol_table: &SymbolTable) -> &str { + for name in symbol_table.keys() { + if name != UNNAMED_DEF { + return name; + } + } + + UNNAMED_DEF +} diff --git a/crates/plotnik-lib/src/query/type_check/symbol.rs b/crates/plotnik-lib/src/query/type_check/symbol.rs new file mode 100644 index 00000000..5fbeeddf --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/symbol.rs @@ -0,0 +1,46 @@ +pub use plotnik_core::{Interner, Symbol}; + +/// A lightweight handle to a named definition. +/// +/// Assigned during dependency analysis. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct DefId(u32); + +impl DefId { + #[inline] + pub fn from_raw(index: u32) -> Self { + Self(index) + } + + #[inline] + pub fn as_u32(self) -> u32 { + self.0 + } + + #[inline] + pub fn index(self) -> usize { + self.0 as usize + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn def_id_roundtrip() { + let id = DefId::from_raw(42); + assert_eq!(id.as_u32(), 42); + assert_eq!(id.index(), 42); + } + + #[test] + fn def_id_equality() { + let a = DefId::from_raw(1); + let b = DefId::from_raw(1); + let c = DefId::from_raw(2); + + assert_eq!(a, b); + assert_ne!(a, c); + } +} diff --git a/crates/plotnik-lib/src/query/type_check/types.rs b/crates/plotnik-lib/src/query/type_check/types.rs new file mode 100644 index 00000000..cb724937 --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/types.rs @@ -0,0 +1,200 @@ +//! Core type definitions for the type checking pass. +//! +//! The type system tracks two orthogonal properties: +//! - Arity: Whether an expression matches one or many node positions. +//! - TypeFlow: What data flows through an expression. + +use std::collections::BTreeMap; + +use super::symbol::{DefId, Symbol}; + +/// Interned type identifier. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct TypeId(pub u32); + +pub const TYPE_VOID: TypeId = TypeId(0); +pub const TYPE_NODE: TypeId = TypeId(1); +pub const TYPE_STRING: TypeId = TypeId(2); + +impl TypeId { + pub fn is_builtin(self) -> bool { + self.0 <= TYPE_STRING.0 + } +} + +/// The kind of a type, determining its structure. +#[derive(Clone, PartialEq, Eq, Hash, Debug)] +pub enum TypeKind { + /// Produces nothing, transparent to parent scope. + Void, + /// A tree-sitter node. + Node, + /// Extracted text from a node. + String, + /// User-specified type via `@x :: TypeName`. + Custom(Symbol), + /// Object with named fields. + Struct(BTreeMap), + /// Tagged union from labeled alternations. + Enum(BTreeMap), + /// Array type with element type. + Array { element: TypeId, non_empty: bool }, + /// Optional wrapper. + Optional(TypeId), + /// Forward reference to a recursive type. + Ref(DefId), +} + +impl TypeKind { + pub fn is_void(&self) -> bool { + matches!(self, Self::Void) + } + + pub fn is_scalar(&self) -> bool { + !self.is_void() + } +} + +/// Field information within a struct type. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct FieldInfo { + pub type_id: TypeId, + pub optional: bool, +} + +impl FieldInfo { + pub fn required(type_id: TypeId) -> Self { + Self { + type_id, + optional: false, + } + } + + pub fn optional(type_id: TypeId) -> Self { + Self { + type_id, + optional: true, + } + } + + pub fn make_optional(self) -> Self { + Self { + optional: true, + ..self + } + } +} + +/// Structural arity - whether an expression matches one or many positions. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Arity { + /// Exactly one node position. + One, + /// Multiple sequential positions. + Many, +} + +impl Arity { + /// Combine arities: Many wins. + pub fn combine(self, other: Self) -> Self { + if self == Self::One && other == Self::One { + return Self::One; + } + Self::Many + } +} + +/// Data flow through an expression. +#[derive(Clone, Debug)] +pub enum TypeFlow { + /// Transparent, produces nothing. + Void, + /// Opaque single value that doesn't bubble (scope boundary). + Scalar(TypeId), + /// Struct type whose fields bubble to parent scope. + Bubble(TypeId), +} + +impl TypeFlow { + pub fn is_void(&self) -> bool { + matches!(self, Self::Void) + } + + pub fn is_scalar(&self) -> bool { + matches!(self, Self::Scalar(_)) + } + + pub fn is_bubble(&self) -> bool { + matches!(self, Self::Bubble(_)) + } + + pub fn type_id(&self) -> Option { + match self { + Self::Void => None, + Self::Scalar(id) | Self::Bubble(id) => Some(*id), + } + } +} + +/// Combined arity and type flow information for an expression. +#[derive(Clone, Debug)] +pub struct TermInfo { + pub arity: Arity, + pub flow: TypeFlow, +} + +impl TermInfo { + pub fn new(arity: Arity, flow: TypeFlow) -> Self { + Self { arity, flow } + } + + pub fn void() -> Self { + Self { + arity: Arity::One, + flow: TypeFlow::Void, + } + } + + pub fn node() -> Self { + Self { + arity: Arity::One, + flow: TypeFlow::Void, + } + } + + pub fn scalar(arity: Arity, type_id: TypeId) -> Self { + Self { + arity, + flow: TypeFlow::Scalar(type_id), + } + } + + pub fn bubble(arity: Arity, struct_type_id: TypeId) -> Self { + Self { + arity, + flow: TypeFlow::Bubble(struct_type_id), + } + } +} + +/// Quantifier kind for type inference. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum QuantifierKind { + /// `?` or `??` - zero or one. + Optional, + /// `*` or `*?` - zero or more. + ZeroOrMore, + /// `+` or `+?` - one or more. + OneOrMore, +} + +impl QuantifierKind { + /// Whether this quantifier requires strict dimensionality (row capture). + pub fn requires_row_capture(self) -> bool { + matches!(self, Self::ZeroOrMore | Self::OneOrMore) + } + + pub fn is_non_empty(self) -> bool { + matches!(self, Self::OneOrMore) + } +} diff --git a/crates/plotnik-lib/src/query/type_check/unify.rs b/crates/plotnik-lib/src/query/type_check/unify.rs new file mode 100644 index 00000000..f44014b5 --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/unify.rs @@ -0,0 +1,208 @@ +//! Unification logic for alternation branches. +//! +//! Handles merging TypeFlow from different branches of untagged alternations. +//! Tagged alternations don't unify—they produce Enum types directly. + +use std::collections::BTreeMap; + +use super::context::TypeContext; +use super::symbol::Symbol; +use super::types::{FieldInfo, TYPE_VOID, TypeFlow, TypeId}; + +/// Error during type unification. +#[derive(Clone, Debug)] +pub enum UnifyError { + /// Scalar type appeared in untagged alternation (needs tagging) + ScalarInUntagged, + /// Capture has incompatible types across branches + IncompatibleTypes { field: Symbol }, + /// Capture has incompatible struct shapes across branches + IncompatibleStructs { field: Symbol }, + /// Array element types don't match + IncompatibleArrayElements { field: Symbol }, +} + +impl UnifyError { + pub fn field_symbol(&self) -> Option { + match self { + UnifyError::ScalarInUntagged => None, + UnifyError::IncompatibleTypes { field } + | UnifyError::IncompatibleStructs { field } + | UnifyError::IncompatibleArrayElements { field } => Some(*field), + } + } +} + +/// Unify multiple flows from alternation branches. +pub fn unify_flows( + ctx: &mut TypeContext, + flows: impl IntoIterator, +) -> Result { + let mut iter = flows.into_iter(); + let Some(first) = iter.next() else { + return Ok(TypeFlow::Void); + }; + + iter.try_fold(first, |acc, flow| unify_flow(ctx, acc, flow)) +} + +/// Unify two TypeFlows from alternation branches. +/// +/// Rules: +/// - Void ∪ Void → Void +/// - Void ∪ Bubble(s) → Bubble(make_all_optional(s)) +/// - Bubble(a) ∪ Bubble(b) → Bubble(merge_fields(a, b)) +/// - Scalar in untagged → Error +pub fn unify_flow(ctx: &mut TypeContext, a: TypeFlow, b: TypeFlow) -> Result { + // Untagged alternations cannot contain scalars. + if matches!(a, TypeFlow::Scalar(_)) || matches!(b, TypeFlow::Scalar(_)) { + return Err(UnifyError::ScalarInUntagged); + } + + match (a, b) { + (TypeFlow::Void, TypeFlow::Void) => Ok(TypeFlow::Void), + + // Void ∪ Bubble -> Bubble (all fields become optional) + (TypeFlow::Void, TypeFlow::Bubble(id)) | (TypeFlow::Bubble(id), TypeFlow::Void) => { + let fields = ctx.get_struct_fields(id).cloned().unwrap_or_default(); + let optional_fields = make_all_optional(fields); + Ok(TypeFlow::Bubble(ctx.intern_struct(optional_fields))) + } + + (TypeFlow::Bubble(a_id), TypeFlow::Bubble(b_id)) => { + let a_fields = ctx.get_struct_fields(a_id).cloned().unwrap_or_default(); + let b_fields = ctx.get_struct_fields(b_id).cloned().unwrap_or_default(); + + let merged = merge_fields(a_fields, b_fields)?; + Ok(TypeFlow::Bubble(ctx.intern_struct(merged))) + } + + // Should be unreachable due to initial scalar check, but technically possible if new variants are added + _ => Err(UnifyError::ScalarInUntagged), + } +} + +/// Make all fields in a map optional. +fn make_all_optional(fields: BTreeMap) -> BTreeMap { + fields + .into_iter() + .map(|(k, v)| (k, v.make_optional())) + .collect() +} + +/// Merge two field maps. +/// +/// Rules: +/// - Keys in both: types must be compatible, field is required iff required in both. +/// - Keys in only one: field becomes optional. +fn merge_fields( + a: BTreeMap, + mut b: BTreeMap, +) -> Result, UnifyError> { + let mut result = BTreeMap::new(); + + // Process all keys from 'a'. Check intersection with 'b'. + for (key, a_info) in a { + if let Some(b_info) = b.remove(&key) { + // Key exists in both: unify types + let type_id = unify_type_ids(a_info.type_id, b_info.type_id, key)?; + let optional = a_info.optional || b_info.optional; + result.insert(key, FieldInfo { type_id, optional }); + } else { + // Key only in 'a': make optional + result.insert(key, a_info.make_optional()); + } + } + + // Remaining keys in 'b' were not in 'a': make optional + result.extend(make_all_optional(b)); + + Ok(result) +} + +/// Unify two type IDs. +/// +/// For now, types must match exactly (except Node is compatible with Node). +fn unify_type_ids(a: TypeId, b: TypeId, field: Symbol) -> Result { + if a == b { + return Ok(a); + } + + // Void is compatible with anything (treat as identity) + if a == TYPE_VOID { + return Ok(b); + } + if b == TYPE_VOID { + return Ok(a); + } + + // Type mismatch + Err(UnifyError::IncompatibleTypes { field }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::query::type_check::TYPE_NODE; + use plotnik_core::Interner; + + #[test] + fn unify_void_void() { + let mut ctx = TypeContext::new(); + let result = unify_flow(&mut ctx, TypeFlow::Void, TypeFlow::Void); + assert!(matches!(result, Ok(TypeFlow::Void))); + } + + #[test] + fn unify_void_bubble() { + let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); + let x = interner.intern("x"); + let struct_id = ctx.intern_single_field(x, FieldInfo::required(TYPE_NODE)); + + let result = unify_flow(&mut ctx, TypeFlow::Void, TypeFlow::Bubble(struct_id)).unwrap(); + + match result { + TypeFlow::Bubble(id) => { + let fields = ctx.get_struct_fields(id).unwrap(); + assert!(fields.get(&x).unwrap().optional); + } + _ => panic!("expected Bubble"), + } + } + + #[test] + fn unify_bubble_merge() { + let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); + let x = interner.intern("x"); + let y = interner.intern("y"); + + let a_id = ctx.intern_single_field(x, FieldInfo::required(TYPE_NODE)); + + let mut b_fields = BTreeMap::new(); + b_fields.insert(x, FieldInfo::required(TYPE_NODE)); + b_fields.insert(y, FieldInfo::required(TYPE_NODE)); + let b_id = ctx.intern_struct(b_fields); + + let result = unify_flow(&mut ctx, TypeFlow::Bubble(a_id), TypeFlow::Bubble(b_id)).unwrap(); + + match result { + TypeFlow::Bubble(id) => { + let fields = ctx.get_struct_fields(id).unwrap(); + // x is in both, so required + assert!(!fields.get(&x).unwrap().optional); + // y only in b, so optional + assert!(fields.get(&y).unwrap().optional); + } + _ => panic!("expected Bubble"), + } + } + + #[test] + fn unify_scalar_error() { + let mut ctx = TypeContext::new(); + let result = unify_flow(&mut ctx, TypeFlow::Scalar(TYPE_NODE), TypeFlow::Void); + assert!(matches!(result, Err(UnifyError::ScalarInUntagged))); + } +} diff --git a/crates/plotnik-lib/src/query/type_check_tests.rs b/crates/plotnik-lib/src/query/type_check_tests.rs new file mode 100644 index 00000000..c1d8bab8 --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check_tests.rs @@ -0,0 +1,656 @@ +use crate::Query; +use indoc::indoc; + +#[test] +fn multiple_definitions_all_emitted() { + let input = indoc! {r#" + Id = (identifier) @id + Foo = (function_declaration name: (Id)) + Bar = (class_declaration name: (Id)) + "#}; + + let res = Query::expect_valid_types(input); + + // All three definitions emitted: Id as primary, Foo and Bar as aliases + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Id { + id: Node; + } + + export type Foo = Id; + + export type Bar = Id; + "); +} + +#[test] +fn multiple_definitions_distinct_types() { + let input = indoc! {r#" + Name = (identifier) @name + Value = (number) @value + Both = (pair (identifier) @key (number) @val) + "#}; + + let res = Query::expect_valid_types(input); + + // All three definitions emitted with their own types + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Both { + key: Node; + val: Node; + } + + export interface Value { + value: Node; + } + + export interface Name { + name: Node; + } + "); +} + +#[test] +fn capture_single_node() { + let input = "Q = (identifier) @name"; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + name: Node; + } + "); +} + +#[test] +fn capture_with_string_annotation() { + let input = "Q = (identifier) @name :: string"; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Q { + name: string; + } + "); +} + +#[test] +fn capture_with_custom_type() { + let input = "Q = (identifier) @name :: Identifier"; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export type Identifier = Node; + + export interface Q { + name: Identifier; + } + "); +} + +#[test] +fn named_node_with_field_capture() { + let input = "Q = (function name: (identifier) @name)"; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + name: Node; + } + "); +} + +#[test] +fn named_node_multiple_field_captures() { + let input = indoc! {r#" + Q = (function + name: (identifier) @name + body: (block) @body + ) + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + body: Node; + name: Node; + } + "); +} + +#[test] +fn nested_named_node_captures() { + let input = indoc! {r#" + Q = (call + function: (member target: (identifier) @target) + ) + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + target: Node; + } + "); +} + +#[test] +fn scalar_list_zero_or_more() { + let input = "Q = (decorator)* @decorators"; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + decorators: Node[]; + } + "); +} + +#[test] +fn scalar_list_one_or_more() { + let input = "Q = (identifier)+ @names"; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + names: [Node, ...Node[]]; + } + "); +} + +#[test] +fn row_list_basic() { + let input = indoc! {r#" + Q = {(key) @k (value) @v}* @rows + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface QRows { + k: Node; + v: Node; + } + + export interface Q { + rows: QRows[]; + } + "); +} + +#[test] +fn row_list_non_empty() { + let input = indoc! {r#" + Q = {(key) @k (value) @v}+ @rows + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface QRows { + k: Node; + v: Node; + } + + export interface Q { + rows: [QRows, ...QRows[]]; + } + "); +} + +#[test] +fn optional_single_capture() { + let input = "Q = (decorator)? @dec"; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + dec?: Node; + } + "); +} + +#[test] +fn optional_group_bubbles_fields() { + let input = indoc! {r#" + Q = {(modifier) @mod (decorator) @dec}? + "#}; + + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + dec?: Node; + mod?: Node; + } + "); +} + +#[test] +fn sequence_merges_fields() { + let input = indoc! {r#" + Q = {(a) @a (b) @b} + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + a: Node; + b: Node; + } + "); +} + +#[test] +fn captured_sequence_creates_struct() { + let input = indoc! {r#" + Q = {(a) @a (b) @b} @row + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface QRow { + a: Node; + b: Node; + } + + export interface Q { + row: QRow; + } + "); +} + +#[test] +fn untagged_alt_same_capture_all_branches() { + let input = "Q = [(a) @x (b) @x]"; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + x: Node; + } + "); +} + +#[test] +fn untagged_alt_different_captures() { + let input = "Q = [(a) @a (b) @b]"; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + a?: Node; + b?: Node; + } + "); +} + +#[test] +fn untagged_alt_partial_overlap() { + let input = indoc! {r#" + Q = [ + {(a) @x (b) @y} + {(a) @x} + ] + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + x: Node; + y?: Node; + } + "); +} + +#[test] +fn tagged_alt_basic() { + let input = indoc! {r#" + Q = [ + Str: (string) @s + Num: (number) @n + ] + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r#" + export interface Node { + kind: string; + text: string; + } + + export interface QStr { + $tag: "Str"; + $data: { s: Node }; + } + + export interface QNum { + $tag: "Num"; + $data: { n: Node }; + } + + export type Q = QStr | QNum; + "#); +} + +#[test] +fn tagged_alt_with_type_annotation() { + let input = indoc! {r#" + Q = [ + Str: (string) @s :: string + Num: (number) @n + ] + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r#" + export interface Node { + kind: string; + text: string; + } + + export interface QStr { + $tag: "Str"; + $data: { s: string }; + } + + export interface QNum { + $tag: "Num"; + $data: { n: Node }; + } + + export type Q = QStr | QNum; + "#); +} + +#[test] +fn tagged_alt_captured() { + let input = indoc! {r#" + Q = [ + Str: (string) @s + Num: (number) @n + ] @result + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r#" + export interface Node { + kind: string; + text: string; + } + + export interface QResultStr { + $tag: "Str"; + $data: { s: Node }; + } + + export interface QResultNum { + $tag: "Num"; + $data: { n: Node }; + } + + export type QResult = QResultStr | QResultNum; + + export interface Q { + result: QResult; + } + "#); +} + +#[test] +fn nested_captured_group() { + let input = indoc! {r#" + Q = { + (identifier) @name + {(key) @k (value) @v} @pair + } + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface QPair { + k: Node; + v: Node; + } + + export interface Q { + name: Node; + pair: QPair; + } + "); +} + +#[test] +fn error_star_with_internal_captures_no_row() { + let input = indoc! {r#" + Bad = {(a) @a (b) @b}* + "#}; + + let res = Query::expect_invalid(input); + + insta::assert_snapshot!(res, @r" + error: quantifier `*` contains captures (`@a`, `@b`) but no row capture + | + 1 | Bad = {(a) @a (b) @b}* + | ^^^^^^^^^^^^^^^^ + | + help: wrap as `{...}* @rows` + "); +} + +#[test] +fn error_plus_with_internal_capture_no_row() { + let input = indoc! {r#" + Bad = {(c) @c}+ + "#}; + + let res = Query::expect_invalid(input); + + insta::assert_snapshot!(res, @r" + error: quantifier `+` contains captures (`@c`) but no row capture + | + 1 | Bad = {(c) @c}+ + | ^^^^^^^^^ + | + help: wrap as `{...}* @rows` + "); +} + +#[test] +fn error_named_node_with_capture_quantified() { + let input = indoc! {r#" + Bad = (func (identifier) @name)* + "#}; + + let res = Query::expect_invalid(input); + + insta::assert_snapshot!(res, @r" + error: quantifier `*` contains captures (`@name`) but no row capture + | + 1 | Bad = (func (identifier) @name)* + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + help: wrap as `{...}* @rows` + "); +} + +#[test] +fn recursive_type_with_alternation() { + let input = indoc! {r#" + Expr = [ + Lit: (number) @value ::string + Binary: (binary_expression + left: (Expr) @left + right: (Expr) @right) + ] + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r#" + export interface ExprLit { + $tag: "Lit"; + $data: { value: string }; + } + + export interface ExprBinary { + $tag: "Binary"; + $data: { left: Expr; right: Expr }; + } + + export type Expr = ExprLit | ExprBinary; + "#); +} + +#[test] +fn recursive_type_optional_self_ref() { + let input = indoc! {r#" + NestedCall = (call_expression + function: [ + (identifier) @name + (NestedCall) @inner + ] + ) + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface NestedCall { + inner?: NestedCall; + name?: Node; + } + "); +} + +#[test] +fn recursive_type_in_quantified_context() { + let input = indoc! {r#" + Item = (item (Item)* @children) + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Item { + children: Item[]; + } + "); +} diff --git a/docs/type-system.md b/docs/type-system.md index 53b0ee01..b24f33a1 100644 --- a/docs/type-system.md +++ b/docs/type-system.md @@ -8,35 +8,9 @@ Plotnik prioritizes **predictability** and **structural clarity** over terseness Two principles guide the type system: -1. **Explicit structure**: Captures bubble up to the nearest scope boundary. To create nested output, you must explicitly capture a group (`{...} @name`). +1. **Flat structure**: Captures bubble up to the nearest scope boundary. -2. **Strict dimensionality**: Quantifiers (`*`, `+`) containing captures require an explicit row capture. This prevents parallel arrays where `a[i]` and `b[i]` lose their per-iteration association. - -### Why Strictness - -Permissive systems create surprises: - -``` -// Permissive: implicit parallel arrays -{ (key) @k (value) @v }* -→ { k: Node[], v: Node[] } // Are k[0] and v[0] related? Maybe... - -// Iteration 1: k="a", v="1" -// Iteration 2: k="b", v="2" -// Output: { k: ["a","b"], v: ["1","2"] } // Association lost in flat arrays -``` - -Plotnik's strict approach: - -``` -// Strict: explicit row structure -{ (key) @k (value) @v }* @pairs -→ { pairs: { k: Node, v: Node }[] } // Each pair is a distinct object - -// Output: { pairs: [{ k: "a", v: "1" }, { k: "b", v: "2" }] } -``` - -The explicit `@pairs` capture tells both the compiler and reader: "this is a list of structured rows." +2. **Strict dimensionality**: Quantifiers (`*`, `+`) containing captures require an explicit row capture. The alternative could be creating parallel arrays, but it's hard to maintain the per-iteration association for `a[i]` and `b[i]`. ### Why Transparent Scoping @@ -77,7 +51,7 @@ This is the core rule that prevents association loss. Strict dimensionality applies **transitively through definitions**. Since definitions are transparent (captures bubble up), quantifying a definition that contains captures is equivalent to quantifying those captures directly: ``` -// Definition with capture +// Definition with captures Item = (pair (key) @k (value) @v) // These are equivalent after expansion: @@ -127,6 +101,8 @@ For node patterns with internal captures, wrap explicitly: → { params: { param: Node, name: string }[] } ``` +The strict rule forces you to think about structure upfront. + ### Optional Bubbling The `?` quantifier does **not** add dimensionality—it produces at most one value, not a list. Therefore, optional groups without captures are allowed: @@ -141,32 +117,6 @@ The `?` quantifier does **not** add dimensionality—it produces at most one val This lets optional fragments contribute fields directly to the parent struct without forcing an extra wrapper object. -### Why This Matters - -Consider extracting methods from classes: - -``` -// What we want: list of method objects -(class_declaration - body: (class_body - { (method_definition - name: (property_identifier) @name - parameters: (formal_parameters) @params - ) @method - }* @methods)) -→ { methods: { method: Node, name: Node, params: Node }[] } - -// Without strict dimensionality, you might write: -(class_declaration - body: (class_body - (method_definition - name: (property_identifier) @name - parameters: (formal_parameters) @params)*)) -→ { name: Node[], params: Node[] } // Parallel arrays—which name goes with which params? -``` - -The strict rule forces you to think about structure upfront. - ## 2. Scope Model ### Universal Bubbling @@ -187,6 +137,8 @@ New data structures are created only when explicitly requested: 2. **Captured Alternations**: `[...] @name` → Union 3. **Tagged Alternations**: `[ L: ... ] @name` → Tagged Union +In case of using quantifiers with captures, compiler forces you to create scope boundaries. + ## 3. Data Shapes ### Structs @@ -207,7 +159,7 @@ Created by `{ ... } @name`: Created by `[ ... ]`: - **Tagged**: `[ L1: (a) @a L2: (b) @b ]` → `{ "$tag": "L1", "$data": { a: Node } }` -- **Untagged**: `[ (a) @a (b) @b ]` → `{ a?: Node, b?: Node }` (merged) +- **Untagged**: `[ (a) @a (b) @b ]` → `{ a?: Node, b?: Node }` (merged 1-level deep) ### Enum Variants @@ -231,10 +183,10 @@ Quantifiers determine whether a field is singular, optional, or an array: | Pattern | Output Type | Meaning | | --------- | ---------------- | ------------ | -| `(x) @a` | `a: T` | exactly one | -| `(x)? @a` | `a?: T` | zero or one | -| `(x)* @a` | `a: T[]` | zero or more | -| `(x)+ @a` | `a: [T, ...T[]]` | one or more | +| `(A) @a` | `a: T` | exactly one | +| `(A)? @a` | `a?: T` | zero or one | +| `(A)* @a` | `a: T[]` | zero or more | +| `(A)+ @a` | `a: [T, ...T[]]` | one or more | ### Row Cardinality @@ -287,20 +239,22 @@ Shallow unification across untagged branches: ] // ERROR: String vs Node ``` +The choice of shallow unification is intentional. For more precision, users should use tagged unions. + ### Array Captures in Alternations -When a quantified capture appears in some branches but not others, the result is `Array | null`: +When a quantified capture appears in some branches but not others, the missing branch emits an empty array: ``` [ (a)+ @x (b) -] // x: Node[] | null +] // x: Node[] ``` -The missing branch emits `null`, not an empty array. This distinction matters: `null` means "branch didn't match" vs `[]` meaning "matched zero times." +Untagged alternations are "I don't care which branch matched"—so distinguishing "branch didn't match" from "matched zero times" is irrelevant. The empty array is easier to consume downstream. -For type conflicts, use tagged alternations: +When types start to conflict, use tagged alternations: ``` [ @@ -321,9 +275,12 @@ For type conflicts, use tagged alternations: Top-level fields merge with optionality; nested mismatches are errors: ``` -// OK: top-level merge +// OK: top-level merge (scalars become optional) { x: Node, y: Node } ∪ { x: Node, z: String } → { x: Node, y?: Node, z?: String } +// OK: arrays emit [] when missing (not null) +{ items: Node[], x: Node } ∪ { x: Node } → { items: Node[], x: Node } + // OK: identical nested { data: { a: Node } } ∪ { data: { a: Node }, extra: Node } → { data: { a: Node }, extra?: Node }