From e2485e46659c2d4efd43be9626f42ce23dc662cb Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 13:14:09 -0300 Subject: [PATCH 01/18] feat: Type check --- crates/plotnik-lib/src/diagnostics/message.rs | 20 + crates/plotnik-lib/src/query/dump.rs | 4 + crates/plotnik-lib/src/query/mod.rs | 3 + crates/plotnik-lib/src/query/query.rs | 16 + crates/plotnik-lib/src/query/query_tests.rs | 12 + .../src/query/type_check/context.rs | 159 ++++++ .../src/query/type_check/emit_ts.rs | 379 +++++++++++++ .../plotnik-lib/src/query/type_check/infer.rs | 513 ++++++++++++++++++ .../plotnik-lib/src/query/type_check/mod.rs | 109 ++++ .../plotnik-lib/src/query/type_check/types.rs | 235 ++++++++ .../plotnik-lib/src/query/type_check/unify.rs | 246 +++++++++ .../plotnik-lib/src/query/type_check_tests.rs | 28 + docs/type-system.md | 87 +-- 13 files changed, 1746 insertions(+), 65 deletions(-) create mode 100644 crates/plotnik-lib/src/query/type_check/context.rs create mode 100644 crates/plotnik-lib/src/query/type_check/emit_ts.rs create mode 100644 crates/plotnik-lib/src/query/type_check/infer.rs create mode 100644 crates/plotnik-lib/src/query/type_check/mod.rs create mode 100644 crates/plotnik-lib/src/query/type_check/types.rs create mode 100644 crates/plotnik-lib/src/query/type_check/unify.rs create mode 100644 crates/plotnik-lib/src/query/type_check_tests.rs diff --git a/crates/plotnik-lib/src/diagnostics/message.rs b/crates/plotnik-lib/src/diagnostics/message.rs index d53e34e2..8c0a2956 100644 --- a/crates/plotnik-lib/src/diagnostics/message.rs +++ b/crates/plotnik-lib/src/diagnostics/message.rs @@ -67,6 +67,10 @@ pub enum DiagnosticKind { IncompatibleTypes, MultiCaptureQuantifierNoName, UnusedBranchLabels, + StrictDimensionalityViolation, + DuplicateCaptureInScope, + IncompatibleCaptureTypes, + IncompatibleStructShapes, // Link pass - grammar validation UnknownNodeType, @@ -182,6 +186,10 @@ impl DiagnosticKind { "quantified expression with multiple captures requires `@name`" } Self::UnusedBranchLabels => "branch labels have no effect without capture", + Self::StrictDimensionalityViolation => "quantifier requires row capture", + Self::DuplicateCaptureInScope => "duplicate capture in scope", + Self::IncompatibleCaptureTypes => "incompatible capture types", + Self::IncompatibleStructShapes => "incompatible struct shapes", // Link pass - grammar validation Self::UnknownNodeType => "unknown node type", @@ -211,6 +219,18 @@ impl DiagnosticKind { Self::UndefinedReference => "`{}` is not defined".to_string(), Self::IncompatibleTypes => "incompatible types: {}".to_string(), + // Type inference errors with context + Self::StrictDimensionalityViolation => "{}".to_string(), + Self::DuplicateCaptureInScope => { + "capture `@{}` already defined in this scope".to_string() + } + Self::IncompatibleCaptureTypes => { + "capture `@{}` has incompatible types across branches".to_string() + } + Self::IncompatibleStructShapes => { + "capture `@{}` has incompatible struct fields across branches".to_string() + } + // Link pass errors with context Self::UnknownNodeType => "`{}` is not a valid node type".to_string(), Self::UnknownField => "`{}` is not a valid field".to_string(), diff --git a/crates/plotnik-lib/src/query/dump.rs b/crates/plotnik-lib/src/query/dump.rs index b8f28dd8..a7f02dca 100644 --- a/crates/plotnik-lib/src/query/dump.rs +++ b/crates/plotnik-lib/src/query/dump.rs @@ -36,5 +36,9 @@ mod test_helpers { pub fn dump_diagnostics_raw(&self) -> String { self.diagnostics().render(self.source_map()) } + + pub fn emit_typescript(&self) -> String { + crate::query::type_check::emit_typescript(self.type_context()) + } } } diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 78d4944e..55778847 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -15,6 +15,7 @@ pub mod link; #[allow(clippy::module_inception)] pub mod query; pub mod symbol_table; +pub mod type_check; pub mod visitor; #[cfg(test)] @@ -31,3 +32,5 @@ mod printer_tests; mod query_tests; #[cfg(test)] mod symbol_table_tests; +#[cfg(test)] +mod type_check_tests; diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index 30da1c61..c3969f2c 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -15,6 +15,7 @@ use crate::query::expr_arity::{ExprArity, ExprArityTable, infer_arities, resolve use crate::query::link; use crate::query::source_map::{SourceId, SourceMap}; use crate::query::symbol_table::{SymbolTable, resolve_names}; +use crate::query::type_check::{self, Arity, TypeContext}; const DEFAULT_QUERY_PARSE_FUEL: u32 = 1_000_000; const DEFAULT_QUERY_PARSE_MAX_DEPTH: u32 = 4096; @@ -115,12 +116,22 @@ impl QueryParsed { &mut self.diag, ); + // Legacy arity table (to be removed once type_check is fully integrated) let arity_table = infer_arities(&self.ast_map, &symbol_table, &mut self.diag); + // New unified type checking pass + let type_context = type_check::infer_types( + &self.ast_map, + &symbol_table, + &dependency_analysis, + &mut self.diag, + ); + QueryAnalyzed { query_parsed: self, symbol_table, arity_table, + type_context, } } @@ -143,6 +154,7 @@ pub struct QueryAnalyzed { query_parsed: QueryParsed, pub symbol_table: SymbolTable, arity_table: ExprArityTable, + type_context: TypeContext, } impl QueryAnalyzed { @@ -154,6 +166,10 @@ impl QueryAnalyzed { resolve_arity(node, &self.arity_table) } + pub fn type_context(&self) -> &TypeContext { + &self.type_context + } + pub fn link(mut self, lang: &Lang) -> LinkedQuery { // Use reference-based hash maps during processing let mut type_ids: HashMap<&str, Option> = HashMap::new(); diff --git a/crates/plotnik-lib/src/query/query_tests.rs b/crates/plotnik-lib/src/query/query_tests.rs index 9aa44c11..4d3f3458 100644 --- a/crates/plotnik-lib/src/query/query_tests.rs +++ b/crates/plotnik-lib/src/query/query_tests.rs @@ -76,6 +76,18 @@ impl QueryAnalyzed { query.dump_diagnostics() } + #[track_caller] + pub fn expect_valid_types(src: &str) -> String { + let query = Self::parse_and_validate(src); + if !query.is_valid() { + panic!( + "Expected valid types, got error:\n{}", + query.dump_diagnostics() + ); + } + query.emit_typescript() + } + #[track_caller] pub fn expect_invalid(src: &str) -> String { let source_map = SourceMap::one_liner(src); diff --git a/crates/plotnik-lib/src/query/type_check/context.rs b/crates/plotnik-lib/src/query/type_check/context.rs new file mode 100644 index 00000000..917c65f3 --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/context.rs @@ -0,0 +1,159 @@ +//! TypeContext: manages interned types and term info cache. +//! +//! Types are interned to enable cheap equality checks and cycle handling. +//! TermInfo is cached per-expression to avoid recomputation. + +use std::collections::HashMap; + +use crate::parser::ast::Expr; + +use super::types::{Arity, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeId, TypeKind}; + +/// Central registry for types and expression metadata. +#[derive(Debug, Clone)] +pub struct TypeContext { + /// Interned types by ID + types: Vec, + /// Deduplication map for type interning + type_map: HashMap, + /// Cached term info per expression + term_info: HashMap, + /// Definition-level type info (for TypeScript emission) + def_types: HashMap, +} + +impl Default for TypeContext { + fn default() -> Self { + Self::new() + } +} + +impl TypeContext { + pub fn new() -> Self { + let mut ctx = Self { + types: Vec::new(), + type_map: HashMap::new(), + term_info: HashMap::new(), + def_types: HashMap::new(), + }; + + // Pre-register builtin types at their expected IDs + let void_id = ctx.intern_type(TypeKind::Void); + debug_assert_eq!(void_id, TYPE_VOID); + + let node_id = ctx.intern_type(TypeKind::Node); + debug_assert_eq!(node_id, TYPE_NODE); + + let string_id = ctx.intern_type(TypeKind::String); + debug_assert_eq!(string_id, TYPE_STRING); + + ctx + } + + /// Intern a type, returning its ID. Deduplicates identical types. + pub fn intern_type(&mut self, kind: TypeKind) -> TypeId { + if let Some(&id) = self.type_map.get(&kind) { + return id; + } + + let id = TypeId(self.types.len() as u32); + self.types.push(kind.clone()); + self.type_map.insert(kind, id); + id + } + + /// Get the TypeKind for a TypeId. + pub fn get_type(&self, id: TypeId) -> Option<&TypeKind> { + self.types.get(id.0 as usize) + } + + /// Get or create a type, returning both the ID and a reference. + pub fn get_or_intern(&mut self, kind: TypeKind) -> (TypeId, &TypeKind) { + let id = self.intern_type(kind); + (id, &self.types[id.0 as usize]) + } + + /// Cache term info for an expression. + pub fn set_term_info(&mut self, expr: Expr, info: TermInfo) { + self.term_info.insert(expr, info); + } + + /// Get cached term info for an expression. + pub fn get_term_info(&self, expr: &Expr) -> Option<&TermInfo> { + self.term_info.get(expr) + } + + /// Register the output type for a definition. + pub fn set_def_type(&mut self, name: String, type_id: TypeId) { + self.def_types.insert(name, type_id); + } + + /// Get the output type for a definition. + pub fn get_def_type(&self, name: &str) -> Option { + self.def_types.get(name).copied() + } + + /// Get arity for an expression (for backward compatibility with expr_arity). + pub fn get_arity(&self, expr: &Expr) -> Option { + self.term_info.get(expr).map(|info| info.arity) + } + + /// Iterate over all interned types. + pub fn iter_types(&self) -> impl Iterator { + self.types + .iter() + .enumerate() + .map(|(i, k)| (TypeId(i as u32), k)) + } + + /// Number of interned types. + pub fn type_count(&self) -> usize { + self.types.len() + } + + /// Iterate over all definition types. + pub fn iter_def_types(&self) -> impl Iterator { + self.def_types.iter().map(|(k, v)| (k.as_str(), *v)) + } +} + +#[cfg(test)] +mod tests { + use std::collections::BTreeMap; + + use super::*; + use crate::query::type_check::types::FieldInfo; + + #[test] + fn builtin_types_have_correct_ids() { + let ctx = TypeContext::new(); + + assert_eq!(ctx.get_type(TYPE_VOID), Some(&TypeKind::Void)); + assert_eq!(ctx.get_type(TYPE_NODE), Some(&TypeKind::Node)); + assert_eq!(ctx.get_type(TYPE_STRING), Some(&TypeKind::String)); + } + + #[test] + fn type_interning_deduplicates() { + let mut ctx = TypeContext::new(); + + let id1 = ctx.intern_type(TypeKind::Node); + let id2 = ctx.intern_type(TypeKind::Node); + + assert_eq!(id1, id2); + assert_eq!(id1, TYPE_NODE); + } + + #[test] + fn struct_types_intern_correctly() { + let mut ctx = TypeContext::new(); + + let mut fields = BTreeMap::new(); + fields.insert("x".to_string(), FieldInfo::required(TYPE_NODE)); + + let id1 = ctx.intern_type(TypeKind::Struct(fields.clone())); + let id2 = ctx.intern_type(TypeKind::Struct(fields)); + + assert_eq!(id1, id2); + } +} diff --git a/crates/plotnik-lib/src/query/type_check/emit_ts.rs b/crates/plotnik-lib/src/query/type_check/emit_ts.rs new file mode 100644 index 00000000..b1359407 --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/emit_ts.rs @@ -0,0 +1,379 @@ +//! TypeScript type emitter for testing type inference. +//! +//! Converts inferred types to TypeScript declarations. +//! Used as a test oracle to verify type inference correctness. + +use std::collections::{BTreeMap, BTreeSet, HashMap}; + +use super::context::TypeContext; +use super::types::{FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TypeId, TypeKind}; + +/// Configuration for TypeScript emission. +#[derive(Clone, Debug)] +pub struct EmitConfig { + /// Whether to export types + pub export: bool, + /// Whether to emit the Node type definition + pub emit_node_type: bool, + /// Name for the root type if unnamed + pub root_type_name: String, + /// Use verbose node representation (with kind, text, etc.) + pub verbose_nodes: bool, +} + +impl Default for EmitConfig { + fn default() -> Self { + Self { + export: true, + emit_node_type: true, + root_type_name: "Query".to_string(), + verbose_nodes: false, + } + } +} + +/// TypeScript emitter. +pub struct TsEmitter<'a> { + ctx: &'a TypeContext, + config: EmitConfig, + /// Generated type names, to avoid collisions + used_names: BTreeSet, + /// TypeId -> generated name mapping + type_names: HashMap, + /// Output buffer + output: String, +} + +impl<'a> TsEmitter<'a> { + pub fn new(ctx: &'a TypeContext, config: EmitConfig) -> Self { + Self { + ctx, + config, + used_names: BTreeSet::new(), + type_names: HashMap::new(), + output: String::new(), + } + } + + /// Emit TypeScript for all definition types. + pub fn emit(mut self) -> String { + // First pass: collect all types that need names + self.collect_type_names(); + + // Emit Node type if configured + if self.config.emit_node_type { + self.emit_node_type(); + } + + // Emit each definition type + for (name, type_id) in self.ctx.iter_def_types() { + self.emit_definition(name, type_id); + } + + self.output + } + + /// Emit TypeScript for a single definition. + pub fn emit_single(mut self, name: &str, type_id: TypeId) -> String { + self.collect_type_names(); + + if self.config.emit_node_type { + self.emit_node_type(); + } + + self.emit_definition(name, type_id); + self.output + } + + fn collect_type_names(&mut self) { + // Reserve definition names first + for (name, _) in self.ctx.iter_def_types() { + let pascal_name = to_pascal_case(name); + self.used_names.insert(pascal_name); + } + + // Then assign names to anonymous types + for (id, kind) in self.ctx.iter_types() { + if self.needs_named_type(kind) && !self.type_names.contains_key(&id) { + let name = self.generate_type_name(kind); + self.type_names.insert(id, name); + } + } + } + + fn needs_named_type(&self, kind: &TypeKind) -> bool { + matches!(kind, TypeKind::Struct(_) | TypeKind::Enum(_)) + } + + fn generate_type_name(&mut self, kind: &TypeKind) -> String { + let base = match kind { + TypeKind::Struct(_) => "Struct", + TypeKind::Enum(_) => "Enum", + _ => "Type", + }; + + self.unique_name(base) + } + + fn unique_name(&mut self, base: &str) -> String { + let base = to_pascal_case(base); + if !self.used_names.contains(&base) { + self.used_names.insert(base.clone()); + return base; + } + + let mut counter = 2; + loop { + let name = format!("{}{}", base, counter); + if !self.used_names.contains(&name) { + self.used_names.insert(name.clone()); + return name; + } + counter += 1; + } + } + + fn emit_node_type(&mut self) { + let export = if self.config.export { "export " } else { "" }; + + if self.config.verbose_nodes { + self.output.push_str(&format!( + "{}interface Node {{\n kind: string;\n text: string;\n startPosition: {{ row: number; column: number }};\n endPosition: {{ row: number; column: number }};\n}}\n\n", + export + )); + } else { + self.output.push_str(&format!( + "{}interface Node {{\n kind: string;\n text: string;\n}}\n\n", + export + )); + } + } + + fn emit_definition(&mut self, name: &str, type_id: TypeId) { + let export = if self.config.export { "export " } else { "" }; + let type_name = to_pascal_case(name); + + let Some(kind) = self.ctx.get_type(type_id) else { + return; + }; + + match kind { + TypeKind::Struct(fields) => { + self.emit_interface(&type_name, fields, export); + } + TypeKind::Enum(variants) => { + self.emit_tagged_union(&type_name, variants, export); + } + _ => { + // For non-struct types, emit a type alias + let ts_type = self.type_to_ts(type_id); + self.output + .push_str(&format!("{}type {} = {};\n\n", export, type_name, ts_type)); + } + } + } + + fn emit_interface(&mut self, name: &str, fields: &BTreeMap, export: &str) { + self.output + .push_str(&format!("{}interface {} {{\n", export, name)); + + for (field_name, info) in fields { + let ts_type = self.type_to_ts(info.type_id); + let optional = if info.optional { "?" } else { "" }; + self.output + .push_str(&format!(" {}{}: {};\n", field_name, optional, ts_type)); + } + + self.output.push_str("}\n\n"); + + // Emit nested types + for (_, info) in fields { + self.maybe_emit_nested_type(info.type_id); + } + } + + fn emit_tagged_union(&mut self, name: &str, variants: &BTreeMap, export: &str) { + // Emit variant types first + let mut variant_types = Vec::new(); + for (variant_name, type_id) in variants { + let variant_type_name = format!("{}{}", name, to_pascal_case(variant_name)); + variant_types.push(variant_type_name.clone()); + + let data_type = self.type_to_ts(*type_id); + self.output.push_str(&format!( + "{}interface {} {{\n $tag: \"{}\";\n $data: {};\n}}\n\n", + export, variant_type_name, variant_name, data_type + )); + + self.maybe_emit_nested_type(*type_id); + } + + // Emit union type + let union = variant_types.join(" | "); + self.output + .push_str(&format!("{}type {} = {};\n\n", export, name, union)); + } + + fn maybe_emit_nested_type(&mut self, type_id: TypeId) { + let Some(kind) = self.ctx.get_type(type_id) else { + return; + }; + + // Skip if already emitted or is a primitive + if type_id.is_builtin() { + return; + } + + match kind { + TypeKind::Struct(fields) => { + if let Some(name) = self.type_names.get(&type_id) { + let name = name.clone(); + let export = if self.config.export { "export " } else { "" }; + self.emit_interface(&name, fields, export); + } + } + TypeKind::Array { element, .. } => { + self.maybe_emit_nested_type(*element); + } + TypeKind::Optional(inner) => { + self.maybe_emit_nested_type(*inner); + } + _ => {} + } + } + + fn type_to_ts(&self, type_id: TypeId) -> String { + if type_id == TYPE_VOID { + return "void".to_string(); + } + if type_id == TYPE_NODE { + return "Node".to_string(); + } + if type_id == TYPE_STRING { + return "string".to_string(); + } + + let Some(kind) = self.ctx.get_type(type_id) else { + return "unknown".to_string(); + }; + + match kind { + TypeKind::Void => "void".to_string(), + TypeKind::Node => "Node".to_string(), + TypeKind::String => "string".to_string(), + TypeKind::Custom(name) => name.clone(), + TypeKind::Ref(name) => to_pascal_case(name), + + TypeKind::Struct(fields) => { + if let Some(name) = self.type_names.get(&type_id) { + name.clone() + } else { + self.inline_struct(fields) + } + } + + TypeKind::Enum(variants) => { + if let Some(name) = self.type_names.get(&type_id) { + name.clone() + } else { + self.inline_enum(variants) + } + } + + TypeKind::Array { element, non_empty } => { + let elem_type = self.type_to_ts(*element); + if *non_empty { + format!("[{}, ...{}[]]", elem_type, elem_type) + } else { + format!("{}[]", elem_type) + } + } + + TypeKind::Optional(inner) => { + let inner_type = self.type_to_ts(*inner); + format!("{} | null", inner_type) + } + } + } + + fn inline_struct(&self, fields: &BTreeMap) -> String { + if fields.is_empty() { + return "{}".to_string(); + } + + let field_strs: Vec<_> = fields + .iter() + .map(|(name, info)| { + let ts_type = self.type_to_ts(info.type_id); + let optional = if info.optional { "?" } else { "" }; + format!("{}{}: {}", name, optional, ts_type) + }) + .collect(); + + format!("{{ {} }}", field_strs.join("; ")) + } + + fn inline_enum(&self, variants: &BTreeMap) -> String { + let variant_strs: Vec<_> = variants + .iter() + .map(|(name, type_id)| { + let data_type = self.type_to_ts(*type_id); + format!("{{ $tag: \"{}\"; $data: {} }}", name, data_type) + }) + .collect(); + + variant_strs.join(" | ") + } +} + +/// Convert a string to PascalCase. +fn to_pascal_case(s: &str) -> String { + let mut result = String::with_capacity(s.len()); + let mut capitalize_next = true; + + for c in s.chars() { + if c == '_' || c == '-' || c == '.' { + capitalize_next = true; + } else if capitalize_next { + result.extend(c.to_uppercase()); + capitalize_next = false; + } else { + result.push(c); + } + } + + result +} + +/// Convenience function to emit TypeScript from a TypeContext. +pub fn emit_typescript(ctx: &TypeContext) -> String { + TsEmitter::new(ctx, EmitConfig::default()).emit() +} + +/// Emit TypeScript with custom configuration. +pub fn emit_typescript_with_config(ctx: &TypeContext, config: EmitConfig) -> String { + TsEmitter::new(ctx, config).emit() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn to_pascal_case_works() { + assert_eq!(to_pascal_case("foo"), "Foo"); + assert_eq!(to_pascal_case("foo_bar"), "FooBar"); + assert_eq!(to_pascal_case("foo-bar"), "FooBar"); + assert_eq!(to_pascal_case("_"), ""); + assert_eq!(to_pascal_case("FooBar"), "FooBar"); + } + + #[test] + fn emit_node_type() { + let ctx = TypeContext::new(); + let output = TsEmitter::new(&ctx, EmitConfig::default()).emit(); + + assert!(output.contains("interface Node")); + assert!(output.contains("kind: string")); + } +} diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs new file mode 100644 index 00000000..437ad327 --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -0,0 +1,513 @@ +//! Bottom-up type inference visitor. +//! +//! Traverses the AST and computes TermInfo (Arity + TypeFlow) for each expression. +//! Reports diagnostics for type errors like strict dimensionality violations. + +use std::collections::BTreeMap; + +use rowan::TextRange; + +use crate::diagnostics::{DiagnosticKind, Diagnostics}; +use crate::parser::ast::{ + AltExpr, AltKind, AnonymousNode, CapturedExpr, Def, Expr, FieldExpr, NamedNode, QuantifiedExpr, + Ref, Root, SeqExpr, +}; +use crate::parser::cst::SyntaxKind; +use crate::query::source_map::SourceId; +use crate::query::symbol_table::SymbolTable; +use crate::query::visitor::{Visitor, walk_alt_expr, walk_def, walk_named_node, walk_seq_expr}; + +use super::context::TypeContext; +use super::types::{ + Arity, FieldInfo, QuantifierKind, TYPE_NODE, TYPE_STRING, TermInfo, TypeFlow, TypeId, TypeKind, +}; +use super::unify::{UnifyError, unify_flows}; + +/// Inference context for a single pass over the AST. +pub struct InferenceVisitor<'a, 'd> { + pub ctx: &'a mut TypeContext, + pub symbol_table: &'a SymbolTable, + pub diag: &'d mut Diagnostics, + pub source_id: SourceId, +} + +impl<'a, 'd> InferenceVisitor<'a, 'd> { + pub fn new( + ctx: &'a mut TypeContext, + symbol_table: &'a SymbolTable, + diag: &'d mut Diagnostics, + source_id: SourceId, + ) -> Self { + Self { + ctx, + symbol_table, + diag, + source_id, + } + } + + /// Infer the TermInfo for an expression, caching the result. + pub fn infer_expr(&mut self, expr: &Expr) -> TermInfo { + // Check cache first + if let Some(info) = self.ctx.get_term_info(expr) { + return info.clone(); + } + + // Insert sentinel to break cycles + self.ctx.set_term_info(expr.clone(), TermInfo::void()); + + let info = self.compute_expr(expr); + self.ctx.set_term_info(expr.clone(), info.clone()); + info + } + + fn compute_expr(&mut self, expr: &Expr) -> TermInfo { + match expr { + Expr::NamedNode(n) => self.infer_named_node(n), + Expr::AnonymousNode(n) => self.infer_anonymous_node(n), + Expr::Ref(r) => self.infer_ref(r), + Expr::SeqExpr(s) => self.infer_seq_expr(s), + Expr::AltExpr(a) => self.infer_alt_expr(a), + Expr::CapturedExpr(c) => self.infer_captured_expr(c), + Expr::QuantifiedExpr(q) => self.infer_quantified_expr(q), + Expr::FieldExpr(f) => self.infer_field_expr(f), + } + } + + /// Named node: matches one position, produces nothing + fn infer_named_node(&mut self, node: &NamedNode) -> TermInfo { + // Recursively infer children first + for child in node.children() { + self.infer_expr(&child); + } + TermInfo::new(Arity::One, TypeFlow::Void) + } + + /// Anonymous node (literal or wildcard): matches one position, produces nothing + fn infer_anonymous_node(&mut self, _node: &AnonymousNode) -> TermInfo { + TermInfo::new(Arity::One, TypeFlow::Void) + } + + /// Reference: delegate arity, but refs are scope boundaries so produce Scalar(Ref) + fn infer_ref(&mut self, r: &Ref) -> TermInfo { + let Some(name_tok) = r.name() else { + return TermInfo::void(); + }; + let name = name_tok.text(); + + // Get the body expression for this definition + let Some(body) = self.symbol_table.get(name) else { + // Undefined ref - already reported by symbol_table pass + return TermInfo::void(); + }; + + // Infer the body to get its arity + let body_info = self.infer_expr(body); + + // Refs are scope boundaries - they produce a Scalar(Ref) regardless of what's inside + let ref_type = self.ctx.intern_type(TypeKind::Ref(name.to_string())); + TermInfo::new(body_info.arity, TypeFlow::Scalar(ref_type)) + } + + /// Sequence: One if 0-1 children, else Many; merge children's fields + fn infer_seq_expr(&mut self, seq: &SeqExpr) -> TermInfo { + let children: Vec<_> = seq.children().collect(); + + // Compute arity based on child count + let arity = match children.len() { + 0 | 1 => children + .first() + .map(|c| self.infer_expr(c).arity) + .unwrap_or(Arity::One), + _ => Arity::Many, + }; + + // Merge fields from all children + let mut merged_fields: BTreeMap = BTreeMap::new(); + + for child in &children { + let child_info = self.infer_expr(child); + + if let TypeFlow::Fields(fields) = child_info.flow { + for (name, info) in fields { + if merged_fields.contains_key(&name) { + // Duplicate capture in same scope - error + self.diag + .report( + self.source_id, + DiagnosticKind::DuplicateCaptureInScope, + child.text_range(), + ) + .message(&name) + .emit(); + } else { + merged_fields.insert(name, info); + } + } + } + // Void and Scalar children don't contribute fields + // (Scalar would be from refs, which are scope boundaries) + } + + let flow = if merged_fields.is_empty() { + TypeFlow::Void + } else { + TypeFlow::Fields(merged_fields) + }; + + TermInfo::new(arity, flow) + } + + /// Alternation: arity is Many if ANY branch is Many; type depends on tagged vs untagged + fn infer_alt_expr(&mut self, alt: &AltExpr) -> TermInfo { + let kind = alt.kind(); + + match kind { + AltKind::Tagged => self.infer_tagged_alt(alt), + AltKind::Untagged | AltKind::Mixed => self.infer_untagged_alt(alt), + } + } + + fn infer_tagged_alt(&mut self, alt: &AltExpr) -> TermInfo { + let mut variants: BTreeMap = BTreeMap::new(); + let mut combined_arity = Arity::One; + + for branch in alt.branches() { + let Some(label) = branch.label() else { + continue; + }; + let label_text = label.text().to_string(); + + let Some(body) = branch.body() else { + // Empty variant gets void/empty struct type + variants.insert( + label_text, + self.ctx.intern_type(TypeKind::Struct(BTreeMap::new())), + ); + continue; + }; + + let body_info = self.infer_expr(&body); + combined_arity = combined_arity.combine(body_info.arity); + + // Convert flow to a type for this variant + let variant_type = self.flow_to_type(&body_info.flow); + variants.insert(label_text, variant_type); + } + + // Tagged alternation produces an Enum type + let enum_type = self.ctx.intern_type(TypeKind::Enum(variants)); + TermInfo::new(combined_arity, TypeFlow::Scalar(enum_type)) + } + + fn infer_untagged_alt(&mut self, alt: &AltExpr) -> TermInfo { + let mut flows: Vec = Vec::new(); + let mut combined_arity = Arity::One; + + // Handle both direct exprs and branches without labels + for branch in alt.branches() { + if let Some(body) = branch.body() { + let body_info = self.infer_expr(&body); + combined_arity = combined_arity.combine(body_info.arity); + flows.push(body_info.flow); + } + } + + for expr in alt.exprs() { + let expr_info = self.infer_expr(&expr); + combined_arity = combined_arity.combine(expr_info.arity); + flows.push(expr_info.flow); + } + + // Unify all flows + let unified_flow = match unify_flows(flows) { + Ok(flow) => flow, + Err(err) => { + self.report_unify_error(alt.text_range(), &err); + TypeFlow::Void + } + }; + + TermInfo::new(combined_arity, unified_flow) + } + + /// Captured expression: wraps inner's flow into a field + fn infer_captured_expr(&mut self, cap: &CapturedExpr) -> TermInfo { + let Some(name_tok) = cap.name() else { + // Missing name - recover gracefully + return cap + .inner() + .map(|inner| self.infer_expr(&inner)) + .unwrap_or_else(TermInfo::void); + }; + let capture_name = name_tok.text().to_string(); + + // Check for type annotation + let annotation_type = cap.type_annotation().and_then(|t| { + t.name().map(|n| { + let type_name = n.text(); + if type_name == "string" { + TYPE_STRING + } else { + self.ctx + .intern_type(TypeKind::Custom(type_name.to_string())) + } + }) + }); + + let Some(inner) = cap.inner() else { + // Capture without inner - still produces a field + let type_id = annotation_type.unwrap_or(TYPE_NODE); + return TermInfo::new( + Arity::One, + TypeFlow::single_field(capture_name, FieldInfo::required(type_id)), + ); + }; + + let inner_info = self.infer_expr(&inner); + + // Transform based on inner's flow + let captured_type = match &inner_info.flow { + TypeFlow::Void => { + // @name on Void → capture produces Node (or annotated type) + annotation_type.unwrap_or(TYPE_NODE) + } + TypeFlow::Scalar(type_id) => { + // @name on Scalar → capture that scalar type + annotation_type.unwrap_or(*type_id) + } + TypeFlow::Fields(fields) => { + // @name on Fields → create Struct from fields, capture that + if let Some(annotated) = annotation_type { + annotated + } else { + self.ctx.intern_type(TypeKind::Struct(fields.clone())) + } + } + }; + + TermInfo::new( + inner_info.arity, + TypeFlow::single_field(capture_name, FieldInfo::required(captured_type)), + ) + } + + /// Quantified expression: applies quantifier to inner's flow + fn infer_quantified_expr(&mut self, quant: &QuantifiedExpr) -> TermInfo { + let Some(inner) = quant.inner() else { + return TermInfo::void(); + }; + + let inner_info = self.infer_expr(&inner); + let quantifier = self.parse_quantifier(quant); + + match quantifier { + QuantifierKind::Optional => { + // `?` makes fields optional, doesn't add dimensionality + let flow = match inner_info.flow { + TypeFlow::Void => TypeFlow::Void, + TypeFlow::Scalar(t) => { + TypeFlow::Scalar(self.ctx.intern_type(TypeKind::Optional(t))) + } + TypeFlow::Fields(fields) => { + // Make all fields optional + let optional_fields = fields + .into_iter() + .map(|(k, v)| (k, v.make_optional())) + .collect(); + TypeFlow::Fields(optional_fields) + } + }; + TermInfo::new(inner_info.arity, flow) + } + + QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore => { + // * and + require strict dimensionality + self.check_strict_dimensionality(quant, &inner_info); + + let flow = match inner_info.flow { + TypeFlow::Void => TypeFlow::Void, + TypeFlow::Scalar(t) => { + // Scalar becomes array + let array_type = self.ctx.intern_type(TypeKind::Array { + element: t, + non_empty: quantifier.is_non_empty(), + }); + TypeFlow::Scalar(array_type) + } + TypeFlow::Fields(fields) => { + // Fields with * or + and no row capture is an error + // (already reported by check_strict_dimensionality) + // Return array of struct as best-effort + let struct_type = self.ctx.intern_type(TypeKind::Struct(fields)); + let array_type = self.ctx.intern_type(TypeKind::Array { + element: struct_type, + non_empty: quantifier.is_non_empty(), + }); + TypeFlow::Scalar(array_type) + } + }; + TermInfo::new(inner_info.arity, flow) + } + } + } + + /// Field expression: arity One, delegates type to value + fn infer_field_expr(&mut self, field: &FieldExpr) -> TermInfo { + let Some(value) = field.value() else { + return TermInfo::void(); + }; + + let value_info = self.infer_expr(&value); + + // Field validation: value must have arity One + if value_info.arity == Arity::Many { + let field_name = field + .name() + .map(|t| t.text().to_string()) + .unwrap_or_else(|| "field".to_string()); + + let mut builder = self.diag.report( + self.source_id, + DiagnosticKind::FieldSequenceValue, + value.text_range(), + ); + builder = builder.message(field_name); + + // If value is a reference, add related info + if let Expr::Ref(r) = &value + && let Some(name_tok) = r.name() + && let Some((def_source, def_body)) = self.symbol_table.get_full(name_tok.text()) + { + builder = builder.related_to(def_source, def_body.text_range(), "defined here"); + } + + builder.emit(); + } + + // Field itself has arity One; flow passes through + TermInfo::new(Arity::One, value_info.flow) + } + + /// Check strict dimensionality rule for * and + quantifiers. + fn check_strict_dimensionality(&mut self, quant: &QuantifiedExpr, inner_info: &TermInfo) { + // If inner has fields (captures), that's a violation + if let TypeFlow::Fields(fields) = &inner_info.flow + && !fields.is_empty() + { + let op = quant + .operator() + .map(|t| t.text().to_string()) + .unwrap_or_else(|| "*".to_string()); + + let capture_names: Vec<_> = fields.keys().map(|s| format!("`@{}`", s)).collect(); + let captures_str = capture_names.join(", "); + + self.diag + .report( + self.source_id, + DiagnosticKind::StrictDimensionalityViolation, + quant.text_range(), + ) + .message(format!( + "quantifier `{}` contains captures ({}) but no row capture", + op, captures_str + )) + .hint("wrap as `{...}* @rows`") + .emit(); + } + } + + fn parse_quantifier(&self, quant: &QuantifiedExpr) -> QuantifierKind { + let Some(op) = quant.operator() else { + return QuantifierKind::ZeroOrMore; + }; + + match op.kind() { + SyntaxKind::Question | SyntaxKind::QuestionQuestion => QuantifierKind::Optional, + SyntaxKind::Star | SyntaxKind::StarQuestion => QuantifierKind::ZeroOrMore, + SyntaxKind::Plus | SyntaxKind::PlusQuestion => QuantifierKind::OneOrMore, + _ => QuantifierKind::ZeroOrMore, + } + } + + /// Convert a TypeFlow to a TypeId for storage in enum variants, etc. + fn flow_to_type(&mut self, flow: &TypeFlow) -> TypeId { + match flow { + TypeFlow::Void => self.ctx.intern_type(TypeKind::Struct(BTreeMap::new())), + TypeFlow::Scalar(t) => *t, + TypeFlow::Fields(fields) => self.ctx.intern_type(TypeKind::Struct(fields.clone())), + } + } + + fn report_unify_error(&mut self, range: TextRange, err: &UnifyError) { + let (kind, msg) = match err { + UnifyError::ScalarInUntagged => ( + DiagnosticKind::IncompatibleTypes, + "scalar type in untagged alternation; use tagged alternation instead".to_string(), + ), + UnifyError::IncompatibleTypes { field } => { + (DiagnosticKind::IncompatibleCaptureTypes, field.clone()) + } + UnifyError::IncompatibleStructs { field } => { + (DiagnosticKind::IncompatibleStructShapes, field.clone()) + } + UnifyError::IncompatibleArrayElements { field } => { + (DiagnosticKind::IncompatibleCaptureTypes, field.clone()) + } + }; + + self.diag + .report(self.source_id, kind, range) + .message(msg) + .emit(); + } +} + +impl Visitor for InferenceVisitor<'_, '_> { + fn visit_def(&mut self, def: &Def) { + walk_def(self, def); + } + + fn visit_expr(&mut self, expr: &Expr) { + self.infer_expr(expr); + } + + fn visit_named_node(&mut self, node: &NamedNode) { + // Visit children first (bottom-up) + walk_named_node(self, node); + } + + fn visit_seq_expr(&mut self, seq: &SeqExpr) { + walk_seq_expr(self, seq); + } + + fn visit_alt_expr(&mut self, alt: &AltExpr) { + walk_alt_expr(self, alt); + } +} + +/// Run inference on a single definition. +pub fn infer_definition( + ctx: &mut TypeContext, + symbol_table: &SymbolTable, + diag: &mut Diagnostics, + source_id: SourceId, + def_name: &str, +) -> Option { + let body = symbol_table.get(def_name)?; + let mut visitor = InferenceVisitor::new(ctx, symbol_table, diag, source_id); + Some(visitor.infer_expr(body)) +} + +/// Run inference on all definitions in a root. +pub fn infer_root( + ctx: &mut TypeContext, + symbol_table: &SymbolTable, + diag: &mut Diagnostics, + source_id: SourceId, + root: &Root, +) { + let mut visitor = InferenceVisitor::new(ctx, symbol_table, diag, source_id); + visitor.visit(root); +} diff --git a/crates/plotnik-lib/src/query/type_check/mod.rs b/crates/plotnik-lib/src/query/type_check/mod.rs new file mode 100644 index 00000000..85d5bcaa --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/mod.rs @@ -0,0 +1,109 @@ +//! Unified type checking pass. +//! +//! Computes both structural arity (for field validation) and data flow types +//! (for TypeScript emission) in a single traversal. +//! +//! Replaces the previous `expr_arity.rs` with a more comprehensive type system. + +mod context; +mod emit_ts; +mod infer; +mod types; +mod unify; + +pub use context::TypeContext; +pub use emit_ts::{EmitConfig, TsEmitter, emit_typescript, emit_typescript_with_config}; +pub use types::{ + Arity, FieldInfo, QuantifierKind, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeFlow, + TypeId, TypeKind, +}; +pub use unify::{UnifyError, unify_flow, unify_flows}; + +use indexmap::IndexMap; + +use crate::diagnostics::Diagnostics; +use crate::parser::ast::Root; +use crate::query::dependencies::DependencyAnalysis; +use crate::query::source_map::SourceId; +use crate::query::symbol_table::{SymbolTable, UNNAMED_DEF}; + +use infer::infer_root; + +/// Run type inference on all definitions. +/// +/// Processes definitions in dependency order (leaves first) to handle +/// recursive definitions correctly. +pub fn infer_types( + ast_map: &IndexMap, + symbol_table: &SymbolTable, + dependency_analysis: &DependencyAnalysis, + diag: &mut Diagnostics, +) -> TypeContext { + let mut ctx = TypeContext::new(); + + // Process definitions in SCC order (leaves first) + for scc in &dependency_analysis.sccs { + for def_name in scc { + // Get the source ID for this definition + let Some(source_id) = symbol_table.source_id(def_name) else { + continue; + }; + + let Some(root) = ast_map.get(&source_id) else { + continue; + }; + + // Run inference on this root + infer_root(&mut ctx, symbol_table, diag, source_id, root); + + // Register the definition's output type + if let Some(body) = symbol_table.get(def_name) { + if let Some(info) = ctx.get_term_info(body).cloned() { + let type_id = flow_to_type_id(&mut ctx, &info.flow); + ctx.set_def_type(def_name.to_string(), type_id); + } + } + } + } + + // Handle any definitions not in an SCC (shouldn't happen, but be safe) + for (name, source_id, _body) in symbol_table.iter_full() { + if ctx.get_def_type(name).is_some() { + continue; + } + + let Some(root) = ast_map.get(&source_id) else { + continue; + }; + + infer_root(&mut ctx, symbol_table, diag, source_id, root); + + if let Some(body) = symbol_table.get(name) { + if let Some(info) = ctx.get_term_info(body).cloned() { + let type_id = flow_to_type_id(&mut ctx, &info.flow); + ctx.set_def_type(name.to_string(), type_id); + } + } + } + + ctx +} + +/// Convert a TypeFlow to a TypeId for storage. +fn flow_to_type_id(ctx: &mut TypeContext, flow: &TypeFlow) -> TypeId { + match flow { + TypeFlow::Void => ctx.intern_type(TypeKind::Struct(std::collections::BTreeMap::new())), + TypeFlow::Scalar(type_id) => *type_id, + TypeFlow::Fields(fields) => ctx.intern_type(TypeKind::Struct(fields.clone())), + } +} + +/// Get the primary definition name (first non-underscore, or underscore if none). +pub fn primary_def_name(symbol_table: &SymbolTable) -> &str { + for name in symbol_table.keys() { + if name != UNNAMED_DEF { + return name; + } + } + UNNAMED_DEF +} diff --git a/crates/plotnik-lib/src/query/type_check/types.rs b/crates/plotnik-lib/src/query/type_check/types.rs new file mode 100644 index 00000000..783c79b4 --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/types.rs @@ -0,0 +1,235 @@ +//! Core type definitions for the type checking pass. +//! +//! The type system tracks two orthogonal properties: +//! - Arity: Whether an expression matches one or many node positions (for field validation) +//! - TypeFlow: What data flows through an expression (for TypeScript emission) + +use std::collections::BTreeMap; + +/// Interned type identifier. Types are stored in TypeContext and referenced by ID. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct TypeId(pub u32); + +/// Void type - produces nothing, transparent +pub const TYPE_VOID: TypeId = TypeId(0); +/// Node type - a tree-sitter node +pub const TYPE_NODE: TypeId = TypeId(1); +/// String type - extracted text from a node via `:: string` +pub const TYPE_STRING: TypeId = TypeId(2); + +impl TypeId { + pub fn is_builtin(self) -> bool { + self.0 <= 2 + } +} + +/// The kind of a type, determining its structure. +#[derive(Clone, PartialEq, Eq, Hash, Debug)] +pub enum TypeKind { + /// Produces nothing, transparent to parent scope + Void, + /// A tree-sitter node + Node, + /// Extracted text from a node + String, + /// User-specified type via `@x :: TypeName` + Custom(String), + /// Object with named fields + Struct(BTreeMap), + /// Tagged union from labeled alternations + Enum(BTreeMap), + /// Array type with element type + Array { element: TypeId, non_empty: bool }, + /// Optional wrapper + Optional(TypeId), + /// Forward reference to a recursive type + Ref(String), +} + +impl TypeKind { + pub fn is_void(&self) -> bool { + matches!(self, TypeKind::Void) + } + + pub fn is_scalar(&self) -> bool { + matches!( + self, + TypeKind::Node + | TypeKind::String + | TypeKind::Custom(_) + | TypeKind::Struct(_) + | TypeKind::Enum(_) + | TypeKind::Array { .. } + | TypeKind::Optional(_) + | TypeKind::Ref(_) + ) + } +} + +/// Field information within a struct type. +#[derive(Clone, PartialEq, Eq, Hash, Debug)] +pub struct FieldInfo { + pub type_id: TypeId, + pub optional: bool, +} + +impl FieldInfo { + pub fn required(type_id: TypeId) -> Self { + Self { + type_id, + optional: false, + } + } + + pub fn optional(type_id: TypeId) -> Self { + Self { + type_id, + optional: true, + } + } + + pub fn make_optional(self) -> Self { + Self { + optional: true, + ..self + } + } +} + +/// Combined arity and type flow information for an expression. +#[derive(Clone, Debug)] +pub struct TermInfo { + pub arity: Arity, + pub flow: TypeFlow, +} + +impl TermInfo { + pub fn new(arity: Arity, flow: TypeFlow) -> Self { + Self { arity, flow } + } + + pub fn void() -> Self { + Self { + arity: Arity::One, + flow: TypeFlow::Void, + } + } + + pub fn node() -> Self { + Self { + arity: Arity::One, + flow: TypeFlow::Void, + } + } + + pub fn scalar(arity: Arity, type_id: TypeId) -> Self { + Self { + arity, + flow: TypeFlow::Scalar(type_id), + } + } + + pub fn fields(arity: Arity, fields: BTreeMap) -> Self { + Self { + arity, + flow: TypeFlow::Fields(fields), + } + } +} + +/// Structural arity - whether an expression matches one or many positions. +/// +/// Used for field validation: `field: expr` requires `expr` to have `Arity::One`. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Arity { + /// Exactly one node position + One, + /// Multiple sequential positions + Many, +} + +impl Arity { + /// Combine arities: Many wins + pub fn combine(self, other: Arity) -> Arity { + match (self, other) { + (Arity::One, Arity::One) => Arity::One, + _ => Arity::Many, + } + } +} + +/// Data flow through an expression. +/// +/// Determines what data an expression contributes to output: +/// - Void: Transparent, produces nothing (used for structural matching) +/// - Scalar: Opaque single value (captures, refs create scope boundaries) +/// - Fields: Transparent field contributions that bubble to parent +#[derive(Clone, Debug)] +pub enum TypeFlow { + /// Transparent, produces nothing + Void, + /// Opaque single value that doesn't bubble + Scalar(TypeId), + /// Transparent fields that bubble to parent scope + Fields(BTreeMap), +} + +impl TypeFlow { + pub fn is_void(&self) -> bool { + matches!(self, TypeFlow::Void) + } + + pub fn is_scalar(&self) -> bool { + matches!(self, TypeFlow::Scalar(_)) + } + + pub fn is_fields(&self) -> bool { + matches!(self, TypeFlow::Fields(_)) + } + + pub fn has_captures(&self) -> bool { + match self { + TypeFlow::Void => false, + TypeFlow::Scalar(_) => false, + TypeFlow::Fields(f) => !f.is_empty(), + } + } + + /// Get field names if this is a Fields flow + pub fn field_names(&self) -> Option> { + match self { + TypeFlow::Fields(f) => Some(f.keys().map(|s| s.as_str()).collect()), + _ => None, + } + } + + /// Create a single-field flow + pub fn single_field(name: String, info: FieldInfo) -> Self { + let mut fields = BTreeMap::new(); + fields.insert(name, info); + TypeFlow::Fields(fields) + } +} + +/// Quantifier kind for type inference +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum QuantifierKind { + /// `?` or `??` - zero or one, no dimensionality added + Optional, + /// `*` or `*?` - zero or more, adds dimensionality + ZeroOrMore, + /// `+` or `+?` - one or more, adds dimensionality + OneOrMore, +} + +impl QuantifierKind { + /// Whether this quantifier requires strict dimensionality (row capture for internal captures) + pub fn requires_row_capture(self) -> bool { + matches!(self, QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore) + } + + /// Whether the resulting array is non-empty + pub fn is_non_empty(self) -> bool { + matches!(self, QuantifierKind::OneOrMore) + } +} diff --git a/crates/plotnik-lib/src/query/type_check/unify.rs b/crates/plotnik-lib/src/query/type_check/unify.rs new file mode 100644 index 00000000..08382a61 --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/unify.rs @@ -0,0 +1,246 @@ +//! Unification logic for alternation branches. +//! +//! Handles merging TypeFlow from different branches of untagged alternations. +//! Tagged alternations don't unify—they produce Enum types directly. + +use std::collections::BTreeMap; + +use super::types::{FieldInfo, TYPE_NODE, TypeFlow, TypeId, TypeKind}; + +/// Error during type unification. +#[derive(Clone, Debug)] +pub enum UnifyError { + /// Scalar type appeared in untagged alternation (needs tagging) + ScalarInUntagged, + /// Capture has incompatible types across branches + IncompatibleTypes { field: String }, + /// Capture has incompatible struct shapes across branches + IncompatibleStructs { field: String }, + /// Array element types don't match + IncompatibleArrayElements { field: String }, +} + +impl UnifyError { + pub fn field_name(&self) -> Option<&str> { + match self { + UnifyError::ScalarInUntagged => None, + UnifyError::IncompatibleTypes { field } + | UnifyError::IncompatibleStructs { field } + | UnifyError::IncompatibleArrayElements { field } => Some(field), + } + } +} + +/// Unify two TypeFlows from alternation branches. +/// +/// Rules: +/// - Void ∪ Void → Void +/// - Void ∪ Fields(f) → Fields(make_all_optional(f)) +/// - Fields(a) ∪ Fields(b) → Fields(merge_fields(a, b)) +/// - Scalar in untagged → Error (use tagged alternation instead) +pub fn unify_flow(a: TypeFlow, b: TypeFlow) -> Result { + match (a, b) { + (TypeFlow::Void, TypeFlow::Void) => Ok(TypeFlow::Void), + + (TypeFlow::Void, TypeFlow::Fields(f)) | (TypeFlow::Fields(f), TypeFlow::Void) => { + Ok(TypeFlow::Fields(make_all_optional(f))) + } + + (TypeFlow::Fields(a), TypeFlow::Fields(b)) => Ok(TypeFlow::Fields(merge_fields(a, b)?)), + + // Scalars can't appear in untagged alternations + (TypeFlow::Scalar(_), _) | (_, TypeFlow::Scalar(_)) => Err(UnifyError::ScalarInUntagged), + } +} + +/// Unify multiple flows from alternation branches. +pub fn unify_flows(flows: impl IntoIterator) -> Result { + let mut iter = flows.into_iter(); + let Some(first) = iter.next() else { + return Ok(TypeFlow::Void); + }; + + iter.try_fold(first, unify_flow) +} + +/// Make all fields in a map optional. +fn make_all_optional(fields: BTreeMap) -> BTreeMap { + fields + .into_iter() + .map(|(k, v)| (k, v.make_optional())) + .collect() +} + +/// Merge two field maps. +/// +/// Rules: +/// - Keys in both: types must be compatible, field is required iff required in both +/// - Keys in only one: field becomes optional +fn merge_fields( + a: BTreeMap, + b: BTreeMap, +) -> Result, UnifyError> { + let mut result = BTreeMap::new(); + + // Process all keys from a + for (key, a_info) in &a { + if let Some(b_info) = b.get(key) { + // Key exists in both: unify types + let unified_type = unify_type_ids(a_info.type_id, b_info.type_id, key)?; + let optional = a_info.optional || b_info.optional; + result.insert( + key.clone(), + FieldInfo { + type_id: unified_type, + optional, + }, + ); + } else { + // Key only in a: make optional + result.insert(key.clone(), a_info.clone().make_optional()); + } + } + + // Process keys only in b + for (key, b_info) in b { + if !a.contains_key(&key) { + result.insert(key, b_info.make_optional()); + } + } + + Ok(result) +} + +/// Unify two type IDs. +/// +/// For now, types must match exactly (except Node is compatible with Node). +/// Future: could allow structural subtyping for structs. +fn unify_type_ids(a: TypeId, b: TypeId, field: &str) -> Result { + if a == b { + return Ok(a); + } + + // Both are Node type - compatible + if a == TYPE_NODE && b == TYPE_NODE { + return Ok(TYPE_NODE); + } + + // Type mismatch + Err(UnifyError::IncompatibleTypes { + field: field.to_string(), + }) +} + +/// Check if two TypeKinds are structurally compatible. +/// +/// Used for deeper compatibility checking when TypeIds differ +/// but the underlying structures might be equivalent. +pub fn types_compatible(a: &TypeKind, b: &TypeKind) -> bool { + match (a, b) { + (TypeKind::Void, TypeKind::Void) => true, + (TypeKind::Node, TypeKind::Node) => true, + (TypeKind::String, TypeKind::String) => true, + (TypeKind::Custom(a), TypeKind::Custom(b)) => a == b, + (TypeKind::Ref(a), TypeKind::Ref(b)) => a == b, + + (TypeKind::Optional(a), TypeKind::Optional(b)) => a == b, + + ( + TypeKind::Array { + element: a_elem, + non_empty: a_ne, + }, + TypeKind::Array { + element: b_elem, + non_empty: b_ne, + }, + ) => { + // Elements must match; looser cardinality wins (non_empty false wins) + a_elem == b_elem && (!a_ne || !b_ne || a_ne == b_ne) + } + + (TypeKind::Struct(a), TypeKind::Struct(b)) => { + // Must have identical field sets + if a.len() != b.len() { + return false; + } + a.iter().all(|(k, a_info)| { + b.get(k) + .map(|b_info| a_info.type_id == b_info.type_id) + .unwrap_or(false) + }) + } + + (TypeKind::Enum(a), TypeKind::Enum(b)) => { + // Must have identical variant sets + if a.len() != b.len() { + return false; + } + a.iter() + .all(|(k, a_ty)| b.get(k).map(|b_ty| a_ty == b_ty).unwrap_or(false)) + } + + _ => false, + } +} + +/// Merge array cardinalities: looser cardinality wins. +/// +/// `+` ∪ `*` → `*` (because branch could be empty) +pub fn merge_array_cardinality(a_non_empty: bool, b_non_empty: bool) -> bool { + a_non_empty && b_non_empty +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn unify_void_void() { + let result = unify_flow(TypeFlow::Void, TypeFlow::Void); + assert!(matches!(result, Ok(TypeFlow::Void))); + } + + #[test] + fn unify_void_fields() { + let mut fields = BTreeMap::new(); + fields.insert("x".to_string(), FieldInfo::required(TYPE_NODE)); + + let result = unify_flow(TypeFlow::Void, TypeFlow::Fields(fields)).unwrap(); + + match result { + TypeFlow::Fields(f) => { + assert!(f.get("x").unwrap().optional); + } + _ => panic!("expected Fields"), + } + } + + #[test] + fn unify_fields_merge() { + let mut a = BTreeMap::new(); + a.insert("x".to_string(), FieldInfo::required(TYPE_NODE)); + + let mut b = BTreeMap::new(); + b.insert("x".to_string(), FieldInfo::required(TYPE_NODE)); + b.insert("y".to_string(), FieldInfo::required(TYPE_NODE)); + + let result = unify_flow(TypeFlow::Fields(a), TypeFlow::Fields(b)).unwrap(); + + match result { + TypeFlow::Fields(f) => { + // x is in both, so required + assert!(!f.get("x").unwrap().optional); + // y only in b, so optional + assert!(f.get("y").unwrap().optional); + } + _ => panic!("expected Fields"), + } + } + + #[test] + fn unify_scalar_error() { + let result = unify_flow(TypeFlow::Scalar(TYPE_NODE), TypeFlow::Void); + assert!(matches!(result, Err(UnifyError::ScalarInUntagged))); + } +} diff --git a/crates/plotnik-lib/src/query/type_check_tests.rs b/crates/plotnik-lib/src/query/type_check_tests.rs new file mode 100644 index 00000000..7e964399 --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check_tests.rs @@ -0,0 +1,28 @@ +use crate::Query; +use indoc::indoc; + +#[test] +fn valid_type_inference() { + let input = indoc! {r#" + ... defs ... + + "Q = ... + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @""); +} + +#[test] +fn invalid_type_inference() { + let input = indoc! {r#" + ... defs ... + + "Q = ... + "#}; + + let res = Query::expect_invalid(input); + + insta::assert_snapshot!(res, @""); +} diff --git a/docs/type-system.md b/docs/type-system.md index 53b0ee01..b24f33a1 100644 --- a/docs/type-system.md +++ b/docs/type-system.md @@ -8,35 +8,9 @@ Plotnik prioritizes **predictability** and **structural clarity** over terseness Two principles guide the type system: -1. **Explicit structure**: Captures bubble up to the nearest scope boundary. To create nested output, you must explicitly capture a group (`{...} @name`). +1. **Flat structure**: Captures bubble up to the nearest scope boundary. -2. **Strict dimensionality**: Quantifiers (`*`, `+`) containing captures require an explicit row capture. This prevents parallel arrays where `a[i]` and `b[i]` lose their per-iteration association. - -### Why Strictness - -Permissive systems create surprises: - -``` -// Permissive: implicit parallel arrays -{ (key) @k (value) @v }* -→ { k: Node[], v: Node[] } // Are k[0] and v[0] related? Maybe... - -// Iteration 1: k="a", v="1" -// Iteration 2: k="b", v="2" -// Output: { k: ["a","b"], v: ["1","2"] } // Association lost in flat arrays -``` - -Plotnik's strict approach: - -``` -// Strict: explicit row structure -{ (key) @k (value) @v }* @pairs -→ { pairs: { k: Node, v: Node }[] } // Each pair is a distinct object - -// Output: { pairs: [{ k: "a", v: "1" }, { k: "b", v: "2" }] } -``` - -The explicit `@pairs` capture tells both the compiler and reader: "this is a list of structured rows." +2. **Strict dimensionality**: Quantifiers (`*`, `+`) containing captures require an explicit row capture. The alternative could be creating parallel arrays, but it's hard to maintain the per-iteration association for `a[i]` and `b[i]`. ### Why Transparent Scoping @@ -77,7 +51,7 @@ This is the core rule that prevents association loss. Strict dimensionality applies **transitively through definitions**. Since definitions are transparent (captures bubble up), quantifying a definition that contains captures is equivalent to quantifying those captures directly: ``` -// Definition with capture +// Definition with captures Item = (pair (key) @k (value) @v) // These are equivalent after expansion: @@ -127,6 +101,8 @@ For node patterns with internal captures, wrap explicitly: → { params: { param: Node, name: string }[] } ``` +The strict rule forces you to think about structure upfront. + ### Optional Bubbling The `?` quantifier does **not** add dimensionality—it produces at most one value, not a list. Therefore, optional groups without captures are allowed: @@ -141,32 +117,6 @@ The `?` quantifier does **not** add dimensionality—it produces at most one val This lets optional fragments contribute fields directly to the parent struct without forcing an extra wrapper object. -### Why This Matters - -Consider extracting methods from classes: - -``` -// What we want: list of method objects -(class_declaration - body: (class_body - { (method_definition - name: (property_identifier) @name - parameters: (formal_parameters) @params - ) @method - }* @methods)) -→ { methods: { method: Node, name: Node, params: Node }[] } - -// Without strict dimensionality, you might write: -(class_declaration - body: (class_body - (method_definition - name: (property_identifier) @name - parameters: (formal_parameters) @params)*)) -→ { name: Node[], params: Node[] } // Parallel arrays—which name goes with which params? -``` - -The strict rule forces you to think about structure upfront. - ## 2. Scope Model ### Universal Bubbling @@ -187,6 +137,8 @@ New data structures are created only when explicitly requested: 2. **Captured Alternations**: `[...] @name` → Union 3. **Tagged Alternations**: `[ L: ... ] @name` → Tagged Union +In case of using quantifiers with captures, compiler forces you to create scope boundaries. + ## 3. Data Shapes ### Structs @@ -207,7 +159,7 @@ Created by `{ ... } @name`: Created by `[ ... ]`: - **Tagged**: `[ L1: (a) @a L2: (b) @b ]` → `{ "$tag": "L1", "$data": { a: Node } }` -- **Untagged**: `[ (a) @a (b) @b ]` → `{ a?: Node, b?: Node }` (merged) +- **Untagged**: `[ (a) @a (b) @b ]` → `{ a?: Node, b?: Node }` (merged 1-level deep) ### Enum Variants @@ -231,10 +183,10 @@ Quantifiers determine whether a field is singular, optional, or an array: | Pattern | Output Type | Meaning | | --------- | ---------------- | ------------ | -| `(x) @a` | `a: T` | exactly one | -| `(x)? @a` | `a?: T` | zero or one | -| `(x)* @a` | `a: T[]` | zero or more | -| `(x)+ @a` | `a: [T, ...T[]]` | one or more | +| `(A) @a` | `a: T` | exactly one | +| `(A)? @a` | `a?: T` | zero or one | +| `(A)* @a` | `a: T[]` | zero or more | +| `(A)+ @a` | `a: [T, ...T[]]` | one or more | ### Row Cardinality @@ -287,20 +239,22 @@ Shallow unification across untagged branches: ] // ERROR: String vs Node ``` +The choice of shallow unification is intentional. For more precision, users should use tagged unions. + ### Array Captures in Alternations -When a quantified capture appears in some branches but not others, the result is `Array | null`: +When a quantified capture appears in some branches but not others, the missing branch emits an empty array: ``` [ (a)+ @x (b) -] // x: Node[] | null +] // x: Node[] ``` -The missing branch emits `null`, not an empty array. This distinction matters: `null` means "branch didn't match" vs `[]` meaning "matched zero times." +Untagged alternations are "I don't care which branch matched"—so distinguishing "branch didn't match" from "matched zero times" is irrelevant. The empty array is easier to consume downstream. -For type conflicts, use tagged alternations: +When types start to conflict, use tagged alternations: ``` [ @@ -321,9 +275,12 @@ For type conflicts, use tagged alternations: Top-level fields merge with optionality; nested mismatches are errors: ``` -// OK: top-level merge +// OK: top-level merge (scalars become optional) { x: Node, y: Node } ∪ { x: Node, z: String } → { x: Node, y?: Node, z?: String } +// OK: arrays emit [] when missing (not null) +{ items: Node[], x: Node } ∪ { x: Node } → { items: Node[], x: Node } + // OK: identical nested { data: { a: Node } } ∪ { data: { a: Node }, extra: Node } → { data: { a: Node }, extra?: Node } From 4f4f862f08a5579b87c85faa2394007cac14de82 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 13:41:17 -0300 Subject: [PATCH 02/18] Fixes --- .../src/query/dependencies_tests.rs | 49 ++ .../plotnik-lib/src/query/expr_arity_tests.rs | 12 + crates/plotnik-lib/src/query/query_tests.rs | 11 + .../plotnik-lib/src/query/type_check/infer.rs | 80 ++- .../plotnik-lib/src/query/type_check_tests.rs | 507 +++++++++++++++++- 5 files changed, 635 insertions(+), 24 deletions(-) diff --git a/crates/plotnik-lib/src/query/dependencies_tests.rs b/crates/plotnik-lib/src/query/dependencies_tests.rs index 73385b6f..ffded59a 100644 --- a/crates/plotnik-lib/src/query/dependencies_tests.rs +++ b/crates/plotnik-lib/src/query/dependencies_tests.rs @@ -42,6 +42,11 @@ fn invalid_unguarded_recursion_in_alternation() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" + error: incompatible types: scalar type in untagged alternation; use tagged alternation instead + | + 1 | E = [(call) (E)] + | ^^^^^^^^^^^^ + error: infinite recursion: cycle consumes no input | 1 | E = [(call) (E)] @@ -206,6 +211,11 @@ fn invalid_diamond_dependency_recursion() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" + error: incompatible types: scalar type in untagged alternation; use tagged alternation instead + | + 1 | A = (a [(B) (C)]) + | ^^^^^^^^^ + error: infinite recursion: cycle has no escape path | 1 | A = (a [(B) (C)]) @@ -343,6 +353,11 @@ fn invalid_direct_left_recursion_in_alternation() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" + error: incompatible types: scalar type in untagged alternation; use tagged alternation instead + | + 1 | E = [(E) (x)] + | ^^^^^^^^^ + error: infinite recursion: cycle consumes no input | 1 | E = [(E) (x)] @@ -359,6 +374,11 @@ fn invalid_direct_right_recursion_in_alternation() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" + error: incompatible types: scalar type in untagged alternation; use tagged alternation instead + | + 1 | E = [(x) (E)] + | ^^^^^^^^^ + error: infinite recursion: cycle consumes no input | 1 | E = [(x) (E)] @@ -393,6 +413,11 @@ fn invalid_unguarded_left_recursion_branch() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" + error: incompatible types: scalar type in untagged alternation; use tagged alternation instead + | + 1 | A = [(A) 'escape'] + | ^^^^^^^^^^^^^^ + error: infinite recursion: cycle consumes no input | 1 | A = [(A) 'escape'] @@ -411,6 +436,11 @@ fn invalid_unguarded_left_recursion_with_wildcard_alt() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" + error: incompatible types: scalar type in untagged alternation; use tagged alternation instead + | + 1 | A = [(A) _] + | ^^^^^^^ + error: infinite recursion: cycle consumes no input | 1 | A = [(A) _] @@ -429,6 +459,11 @@ fn invalid_unguarded_left_recursion_with_tree_alt() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" + error: incompatible types: scalar type in untagged alternation; use tagged alternation instead + | + 1 | A = [(A) (leaf)] + | ^^^^^^^^^^^^ + error: infinite recursion: cycle consumes no input | 1 | A = [(A) (leaf)] @@ -502,6 +537,15 @@ fn invalid_simple_unguarded_recursion() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" + error: incompatible types: scalar type in untagged alternation; use tagged alternation instead + | + 1 | A = [ + | _____^ + 2 | | (foo) + 3 | | (A) + 4 | | ] + | |_^ + error: infinite recursion: cycle consumes no input | 3 | (A) @@ -521,6 +565,11 @@ fn invalid_unguarded_mutual_recursion_chain() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" + error: incompatible types: scalar type in untagged alternation; use tagged alternation instead + | + 1 | A = [(B) (x)] + | ^^^^^^^^^ + error: infinite recursion: cycle consumes no input | 1 | A = [(B) (x)] diff --git a/crates/plotnik-lib/src/query/expr_arity_tests.rs b/crates/plotnik-lib/src/query/expr_arity_tests.rs index 687e330a..d4f02a58 100644 --- a/crates/plotnik-lib/src/query/expr_arity_tests.rs +++ b/crates/plotnik-lib/src/query/expr_arity_tests.rs @@ -178,6 +178,11 @@ fn field_with_seq_error() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" + error: field `name` must match exactly one node, not a sequence + | + 1 | Q = (call name: {(a) (b)}) + | ^^^^^^^^^ + error: field `name` must match exactly one node, not a sequence | 1 | Q = (call name: {(a) (b)}) @@ -195,6 +200,13 @@ fn field_with_ref_to_seq_error() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" + error: field `name` must match exactly one node, not a sequence + | + 1 | X = {(a) (b)} + | --------- defined here + 2 | Q = (call name: (X)) + | ^^^ + error: field `name` must match exactly one node, not a sequence | 1 | X = {(a) (b)} diff --git a/crates/plotnik-lib/src/query/query_tests.rs b/crates/plotnik-lib/src/query/query_tests.rs index 4d3f3458..8e472b30 100644 --- a/crates/plotnik-lib/src/query/query_tests.rs +++ b/crates/plotnik-lib/src/query/query_tests.rs @@ -141,6 +141,17 @@ fn multifile_field_with_ref_to_seq_error() { assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" + error: field `name` must match exactly one node, not a sequence + --> main.ptk:1:17 + | + 1 | Q = (call name: (X)) + | ^^^ + | + ::: defs.ptk:1:5 + | + 1 | X = {(a) (b)} + | --------- defined here + error: field `name` must match exactly one node, not a sequence --> main.ptk:1:17 | diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs index 437ad327..52d44a34 100644 --- a/crates/plotnik-lib/src/query/type_check/infer.rs +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -74,13 +74,29 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { } } - /// Named node: matches one position, produces nothing + /// Named node: matches one position, bubbles up child captures fn infer_named_node(&mut self, node: &NamedNode) -> TermInfo { - // Recursively infer children first + let mut merged_fields: BTreeMap = BTreeMap::new(); + for child in node.children() { - self.infer_expr(&child); + let child_info = self.infer_expr(&child); + + if let TypeFlow::Fields(fields) = child_info.flow { + for (name, info) in fields { + if !merged_fields.contains_key(&name) { + merged_fields.insert(name, info); + } + } + } } - TermInfo::new(Arity::One, TypeFlow::Void) + + let flow = if merged_fields.is_empty() { + TypeFlow::Void + } else { + TypeFlow::Fields(merged_fields) + }; + + TermInfo::new(Arity::One, flow) } /// Anonymous node (literal or wildcard): matches one position, produces nothing @@ -88,7 +104,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { TermInfo::new(Arity::One, TypeFlow::Void) } - /// Reference: delegate arity, but refs are scope boundaries so produce Scalar(Ref) + /// Reference: transparent - propagate body's flow and arity fn infer_ref(&mut self, r: &Ref) -> TermInfo { let Some(name_tok) = r.name() else { return TermInfo::void(); @@ -101,12 +117,8 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { return TermInfo::void(); }; - // Infer the body to get its arity - let body_info = self.infer_expr(body); - - // Refs are scope boundaries - they produce a Scalar(Ref) regardless of what's inside - let ref_type = self.ctx.intern_type(TypeKind::Ref(name.to_string())); - TermInfo::new(body_info.arity, TypeFlow::Scalar(ref_type)) + // Refs are transparent - propagate body's flow and arity + self.infer_expr(body) } /// Sequence: One if 0-1 children, else Many; merge children's fields @@ -264,7 +276,21 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { ); }; - let inner_info = self.infer_expr(&inner); + // Special handling: if inner is a * or + quantifier, this capture serves as + // the row capture, so we skip strict dimensionality check + let inner_info = if let Expr::QuantifiedExpr(q) = &inner { + let quantifier = self.parse_quantifier(q); + if matches!( + quantifier, + QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore + ) { + self.infer_quantified_expr_as_row(q) + } else { + self.infer_expr(&inner) + } + } else { + self.infer_expr(&inner) + }; // Transform based on inner's flow let captured_type = match &inner_info.flow { @@ -294,6 +320,19 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { /// Quantified expression: applies quantifier to inner's flow fn infer_quantified_expr(&mut self, quant: &QuantifiedExpr) -> TermInfo { + self.infer_quantified_expr_impl(quant, false) + } + + /// Quantified expression when used as a row capture (skip strict dimensionality check) + fn infer_quantified_expr_as_row(&mut self, quant: &QuantifiedExpr) -> TermInfo { + self.infer_quantified_expr_impl(quant, true) + } + + fn infer_quantified_expr_impl( + &mut self, + quant: &QuantifiedExpr, + is_row_capture: bool, + ) -> TermInfo { let Some(inner) = quant.inner() else { return TermInfo::void(); }; @@ -322,11 +361,20 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { } QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore => { - // * and + require strict dimensionality - self.check_strict_dimensionality(quant, &inner_info); + // * and + require strict dimensionality (unless this is a row capture) + if !is_row_capture { + self.check_strict_dimensionality(quant, &inner_info); + } let flow = match inner_info.flow { - TypeFlow::Void => TypeFlow::Void, + TypeFlow::Void => { + // Scalar list: void inner becomes array of Node + let array_type = self.ctx.intern_type(TypeKind::Array { + element: TYPE_NODE, + non_empty: quantifier.is_non_empty(), + }); + TypeFlow::Scalar(array_type) + } TypeFlow::Scalar(t) => { // Scalar becomes array let array_type = self.ctx.intern_type(TypeKind::Array { @@ -337,7 +385,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { } TypeFlow::Fields(fields) => { // Fields with * or + and no row capture is an error - // (already reported by check_strict_dimensionality) + // (already reported by check_strict_dimensionality if !is_row_capture) // Return array of struct as best-effort let struct_type = self.ctx.intern_type(TypeKind::Struct(fields)); let array_type = self.ctx.intern_type(TypeKind::Array { diff --git a/crates/plotnik-lib/src/query/type_check_tests.rs b/crates/plotnik-lib/src/query/type_check_tests.rs index 7e964399..94c20c51 100644 --- a/crates/plotnik-lib/src/query/type_check_tests.rs +++ b/crates/plotnik-lib/src/query/type_check_tests.rs @@ -1,28 +1,519 @@ use crate::Query; use indoc::indoc; +// ============================================================================= +// BASIC CAPTURES +// ============================================================================= + +#[test] +fn capture_single_node() { + let input = "Q = (identifier) @name"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + name: Node; + } + "); +} + +#[test] +fn capture_with_string_annotation() { + let input = "Q = (identifier) @name :: string"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + name: string; + } + "); +} + +#[test] +fn capture_with_custom_type() { + let input = "Q = (identifier) @name :: Identifier"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + name: Identifier; + } + "); +} + +// ============================================================================= +// NAMED NODE FLOW PROPAGATION (Bug #2) +// ============================================================================= + +#[test] +fn named_node_with_field_capture() { + // Child capture should bubble up through named node + let input = "Q = (function name: (identifier) @name)"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + name: Node; + } + "); +} + +#[test] +fn named_node_multiple_field_captures() { + let input = "Q = (function name: (identifier) @name body: (block) @body)"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + body: Node; + name: Node; + } + "); +} + +#[test] +fn nested_named_node_captures() { + let input = "Q = (call function: (member target: (identifier) @target))"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + target: Node; + } + "); +} + +// ============================================================================= +// SCALAR LISTS (Bug #1) +// ============================================================================= + +#[test] +fn scalar_list_zero_or_more() { + // No internal captures → scalar list: Node[] + let input = "Q = (decorator)* @decorators"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + decorators: Node[]; + } + "); +} + +#[test] +fn scalar_list_one_or_more() { + // No internal captures → non-empty array: [Node, ...Node[]] + let input = "Q = (identifier)+ @names"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + names: [Node, ...Node[]]; + } + "); +} + +// ============================================================================= +// ROW LISTS +// ============================================================================= + #[test] -fn valid_type_inference() { +fn row_list_basic() { let input = indoc! {r#" - ... defs ... + Q = {(key) @k (value) @v}* @rows + "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + rows: Struct[]; + } - "Q = ... + export interface Struct { + k: Node; + v: Node; + } + "); +} + +#[test] +fn row_list_non_empty() { + let input = indoc! {r#" + Q = {(key) @k (value) @v}+ @rows "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + rows: [Struct, ...Struct[]]; + } + + export interface Struct { + k: Node; + v: Node; + } + "); +} + +// ============================================================================= +// OPTIONAL PATTERNS +// ============================================================================= +#[test] +fn optional_single_capture() { + let input = "Q = (decorator)? @dec"; let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } - insta::assert_snapshot!(res, @""); + export interface Q { + dec: Node; + } + "); } #[test] -fn invalid_type_inference() { +fn optional_group_bubbles_fields() { + // ? does NOT require row capture; fields bubble as optional + let input = indoc! {r#" + Q = {(modifier) @mod (decorator) @dec}? + "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + dec?: Node; + mod?: Node; + } + "); +} + +// ============================================================================= +// SEQUENCES +// ============================================================================= + +#[test] +fn sequence_merges_fields() { let input = indoc! {r#" - ... defs ... + Q = {(a) @a (b) @b} + "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } - "Q = ... + export interface Q { + a: Node; + b: Node; + } + "); +} + +#[test] +fn captured_sequence_creates_struct() { + let input = indoc! {r#" + Q = {(a) @a (b) @b} @row "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + export interface Q { + row: Struct; + } + + export interface Struct { + a: Node; + b: Node; + } + "); +} + +// ============================================================================= +// UNTAGGED ALTERNATIONS (merge style) +// ============================================================================= + +#[test] +fn untagged_alt_same_capture_all_branches() { + // Same capture in all branches → required field + let input = "Q = [(a) @x (b) @x]"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + x: Node; + } + "); +} + +#[test] +fn untagged_alt_different_captures() { + // Different captures → both optional + let input = "Q = [(a) @a (b) @b]"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + a?: Node; + b?: Node; + } + "); +} + +#[test] +fn untagged_alt_partial_overlap() { + // Partial overlap → common required, others optional + let input = indoc! {r#" + Q = [ + {(a) @x (b) @y} + {(a) @x} + ] + "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + x: Node; + y?: Node; + } + "); +} + +// ============================================================================= +// TAGGED ALTERNATIONS (Bug #3) +// ============================================================================= + +#[test] +fn tagged_alt_basic() { + let input = indoc! {r#" + Q = [Str: (string) @s Num: (number) @n] + "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r#" + export interface Node { + kind: string; + text: string; + } + + export interface QNum { + $tag: "Num"; + $data: Struct2; + } + + export interface Struct2 { + n: Node; + } + + export interface QStr { + $tag: "Str"; + $data: Struct; + } + + export interface Struct { + s: Node; + } + + export type Q = QNum | QStr; + "#); +} + +#[test] +fn tagged_alt_with_type_annotation() { + let input = indoc! {r#" + Q = [Str: (string) @s ::string Num: (number) @n] + "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r#" + export interface Node { + kind: string; + text: string; + } + + export interface QNum { + $tag: "Num"; + $data: Struct2; + } + + export interface Struct2 { + n: Node; + } + + export interface QStr { + $tag: "Str"; + $data: Struct; + } + + export interface Struct { + s: string; + } + + export type Q = QNum | QStr; + "#); +} + +#[test] +fn tagged_alt_captured() { + // Captured tagged alternation + let input = indoc! {r#" + Q = [Str: (string) @s Num: (number) @n] @result + "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + result: Enum; + } + "); +} + +// ============================================================================= +// NESTED STRUCTURES +// ============================================================================= + +#[test] +fn nested_captured_group() { + let input = indoc! {r#" + Q = { + (identifier) @name + {(key) @k (value) @v} @pair + } + "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Q { + name: Node; + pair: Struct; + } + + export interface Struct { + k: Node; + v: Node; + } + "); +} + +// ============================================================================= +// STRICT DIMENSIONALITY VIOLATIONS (errors) +// ============================================================================= + +#[test] +fn error_star_with_internal_captures_no_row() { + let input = indoc! {r#" + Bad = {(a) @a (b) @b}* + "#}; + let res = Query::expect_invalid(input); + insta::assert_snapshot!(res, @r" + error: quantifier `*` contains captures (`@a`, `@b`) but no row capture + | + 1 | Bad = {(a) @a (b) @b}* + | ^^^^^^^^^^^^^^^^ + | + help: wrap as `{...}* @rows` + "); +} + +#[test] +fn error_plus_with_internal_capture_no_row() { + let input = indoc! {r#" + Bad = {(c) @c}+ + "#}; let res = Query::expect_invalid(input); + insta::assert_snapshot!(res, @r" + error: quantifier `+` contains captures (`@c`) but no row capture + | + 1 | Bad = {(c) @c}+ + | ^^^^^^^^^ + | + help: wrap as `{...}* @rows` + "); +} - insta::assert_snapshot!(res, @""); +#[test] +fn error_named_node_with_capture_quantified() { + // (func (id) @name)* has internal capture + let input = indoc! {r#" + Bad = (func (identifier) @name)* + "#}; + let res = Query::expect_invalid(input); + insta::assert_snapshot!(res, @r" + error: quantifier `*` contains captures (`@name`) but no row capture + | + 1 | Bad = (func (identifier) @name)* + | ^^^^^^^^^^^^^^^^^^^^^^^^^^ + | + help: wrap as `{...}* @rows` + "); } From 9fb8683c065f6c6ac445dd3b727c6134e7739705 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 13:48:48 -0300 Subject: [PATCH 03/18] Fixes --- .../src/query/dependencies_tests.rs | 49 --------------- crates/plotnik-lib/src/query/link_tests.rs | 6 +- .../plotnik-lib/src/query/type_check/infer.rs | 13 ---- .../plotnik-lib/src/query/type_check/unify.rs | 60 ------------------- 4 files changed, 3 insertions(+), 125 deletions(-) diff --git a/crates/plotnik-lib/src/query/dependencies_tests.rs b/crates/plotnik-lib/src/query/dependencies_tests.rs index ffded59a..73385b6f 100644 --- a/crates/plotnik-lib/src/query/dependencies_tests.rs +++ b/crates/plotnik-lib/src/query/dependencies_tests.rs @@ -42,11 +42,6 @@ fn invalid_unguarded_recursion_in_alternation() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" - error: incompatible types: scalar type in untagged alternation; use tagged alternation instead - | - 1 | E = [(call) (E)] - | ^^^^^^^^^^^^ - error: infinite recursion: cycle consumes no input | 1 | E = [(call) (E)] @@ -211,11 +206,6 @@ fn invalid_diamond_dependency_recursion() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" - error: incompatible types: scalar type in untagged alternation; use tagged alternation instead - | - 1 | A = (a [(B) (C)]) - | ^^^^^^^^^ - error: infinite recursion: cycle has no escape path | 1 | A = (a [(B) (C)]) @@ -353,11 +343,6 @@ fn invalid_direct_left_recursion_in_alternation() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" - error: incompatible types: scalar type in untagged alternation; use tagged alternation instead - | - 1 | E = [(E) (x)] - | ^^^^^^^^^ - error: infinite recursion: cycle consumes no input | 1 | E = [(E) (x)] @@ -374,11 +359,6 @@ fn invalid_direct_right_recursion_in_alternation() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" - error: incompatible types: scalar type in untagged alternation; use tagged alternation instead - | - 1 | E = [(x) (E)] - | ^^^^^^^^^ - error: infinite recursion: cycle consumes no input | 1 | E = [(x) (E)] @@ -413,11 +393,6 @@ fn invalid_unguarded_left_recursion_branch() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" - error: incompatible types: scalar type in untagged alternation; use tagged alternation instead - | - 1 | A = [(A) 'escape'] - | ^^^^^^^^^^^^^^ - error: infinite recursion: cycle consumes no input | 1 | A = [(A) 'escape'] @@ -436,11 +411,6 @@ fn invalid_unguarded_left_recursion_with_wildcard_alt() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" - error: incompatible types: scalar type in untagged alternation; use tagged alternation instead - | - 1 | A = [(A) _] - | ^^^^^^^ - error: infinite recursion: cycle consumes no input | 1 | A = [(A) _] @@ -459,11 +429,6 @@ fn invalid_unguarded_left_recursion_with_tree_alt() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" - error: incompatible types: scalar type in untagged alternation; use tagged alternation instead - | - 1 | A = [(A) (leaf)] - | ^^^^^^^^^^^^ - error: infinite recursion: cycle consumes no input | 1 | A = [(A) (leaf)] @@ -537,15 +502,6 @@ fn invalid_simple_unguarded_recursion() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" - error: incompatible types: scalar type in untagged alternation; use tagged alternation instead - | - 1 | A = [ - | _____^ - 2 | | (foo) - 3 | | (A) - 4 | | ] - | |_^ - error: infinite recursion: cycle consumes no input | 3 | (A) @@ -565,11 +521,6 @@ fn invalid_unguarded_mutual_recursion_chain() { let res = Query::expect_invalid(input); insta::assert_snapshot!(res, @r" - error: incompatible types: scalar type in untagged alternation; use tagged alternation instead - | - 1 | A = [(B) (x)] - | ^^^^^^^^^ - error: infinite recursion: cycle consumes no input | 1 | A = [(B) (x)] diff --git a/crates/plotnik-lib/src/query/link_tests.rs b/crates/plotnik-lib/src/query/link_tests.rs index 1599dfb1..7b98cce9 100644 --- a/crates/plotnik-lib/src/query/link_tests.rs +++ b/crates/plotnik-lib/src/query/link_tests.rs @@ -319,10 +319,10 @@ fn wildcard_node_skips_validation() { #[test] fn def_reference_with_link() { + // Test linking with definition reference as scalar list (no internal captures) let input = indoc! {r#" - Func = (function_declaration - name: (identifier) @name) @fn - Q = (program (Func)+) + Func = (function_declaration) + Q = (program (Func)+ @funcs) "#}; Query::expect_valid_linking(input, &LANG); diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs index 52d44a34..00ada5dd 100644 --- a/crates/plotnik-lib/src/query/type_check/infer.rs +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -535,19 +535,6 @@ impl Visitor for InferenceVisitor<'_, '_> { } } -/// Run inference on a single definition. -pub fn infer_definition( - ctx: &mut TypeContext, - symbol_table: &SymbolTable, - diag: &mut Diagnostics, - source_id: SourceId, - def_name: &str, -) -> Option { - let body = symbol_table.get(def_name)?; - let mut visitor = InferenceVisitor::new(ctx, symbol_table, diag, source_id); - Some(visitor.infer_expr(body)) -} - /// Run inference on all definitions in a root. pub fn infer_root( ctx: &mut TypeContext, diff --git a/crates/plotnik-lib/src/query/type_check/unify.rs b/crates/plotnik-lib/src/query/type_check/unify.rs index 08382a61..5d2b4970 100644 --- a/crates/plotnik-lib/src/query/type_check/unify.rs +++ b/crates/plotnik-lib/src/query/type_check/unify.rs @@ -131,66 +131,6 @@ fn unify_type_ids(a: TypeId, b: TypeId, field: &str) -> Result bool { - match (a, b) { - (TypeKind::Void, TypeKind::Void) => true, - (TypeKind::Node, TypeKind::Node) => true, - (TypeKind::String, TypeKind::String) => true, - (TypeKind::Custom(a), TypeKind::Custom(b)) => a == b, - (TypeKind::Ref(a), TypeKind::Ref(b)) => a == b, - - (TypeKind::Optional(a), TypeKind::Optional(b)) => a == b, - - ( - TypeKind::Array { - element: a_elem, - non_empty: a_ne, - }, - TypeKind::Array { - element: b_elem, - non_empty: b_ne, - }, - ) => { - // Elements must match; looser cardinality wins (non_empty false wins) - a_elem == b_elem && (!a_ne || !b_ne || a_ne == b_ne) - } - - (TypeKind::Struct(a), TypeKind::Struct(b)) => { - // Must have identical field sets - if a.len() != b.len() { - return false; - } - a.iter().all(|(k, a_info)| { - b.get(k) - .map(|b_info| a_info.type_id == b_info.type_id) - .unwrap_or(false) - }) - } - - (TypeKind::Enum(a), TypeKind::Enum(b)) => { - // Must have identical variant sets - if a.len() != b.len() { - return false; - } - a.iter() - .all(|(k, a_ty)| b.get(k).map(|b_ty| a_ty == b_ty).unwrap_or(false)) - } - - _ => false, - } -} - -/// Merge array cardinalities: looser cardinality wins. -/// -/// `+` ∪ `*` → `*` (because branch could be empty) -pub fn merge_array_cardinality(a_non_empty: bool, b_non_empty: bool) -> bool { - a_non_empty && b_non_empty -} - #[cfg(test)] mod tests { use super::*; From 598acfcf3ae5cdb33622b9a02a17e5d55b756384 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 14:11:21 -0300 Subject: [PATCH 04/18] Fixes --- .../src/query/type_check/emit_ts.rs | 217 ++++++++++++++++-- .../plotnik-lib/src/query/type_check/infer.rs | 29 ++- .../plotnik-lib/src/query/type_check/unify.rs | 2 +- .../plotnik-lib/src/query/type_check_tests.rs | 109 +++------ 4 files changed, 250 insertions(+), 107 deletions(-) diff --git a/crates/plotnik-lib/src/query/type_check/emit_ts.rs b/crates/plotnik-lib/src/query/type_check/emit_ts.rs index b1359407..f81415cf 100644 --- a/crates/plotnik-lib/src/query/type_check/emit_ts.rs +++ b/crates/plotnik-lib/src/query/type_check/emit_ts.rs @@ -3,11 +3,18 @@ //! Converts inferred types to TypeScript declarations. //! Used as a test oracle to verify type inference correctness. -use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use super::context::TypeContext; use super::types::{FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TypeId, TypeKind}; +/// Naming context for synthetic type names: (DefName, FieldName) +#[derive(Clone, Debug)] +struct NamingContext { + def_name: String, + field_name: Option, +} + /// Configuration for TypeScript emission. #[derive(Clone, Debug)] pub struct EmitConfig { @@ -40,6 +47,10 @@ pub struct TsEmitter<'a> { used_names: BTreeSet, /// TypeId -> generated name mapping type_names: HashMap, + /// Custom type names that need `type X = Node` aliases + custom_types: BTreeSet, + /// Track which builtin types are referenced + referenced_builtins: HashSet, /// Output buffer output: String, } @@ -51,17 +62,22 @@ impl<'a> TsEmitter<'a> { config, used_names: BTreeSet::new(), type_names: HashMap::new(), + custom_types: BTreeSet::new(), + referenced_builtins: HashSet::new(), output: String::new(), } } /// Emit TypeScript for all definition types. pub fn emit(mut self) -> String { - // First pass: collect all types that need names - self.collect_type_names(); + // First pass: collect all types that need names with context + self.collect_type_names_with_context(); - // Emit Node type if configured - if self.config.emit_node_type { + // Second pass: collect referenced builtins and custom types + self.collect_references(); + + // Emit Node type if configured and actually used + if self.config.emit_node_type && self.referenced_builtins.contains(&TYPE_NODE) { self.emit_node_type(); } @@ -70,37 +86,171 @@ impl<'a> TsEmitter<'a> { self.emit_definition(name, type_id); } + // Emit custom type aliases + self.emit_custom_type_aliases(); + self.output } /// Emit TypeScript for a single definition. pub fn emit_single(mut self, name: &str, type_id: TypeId) -> String { - self.collect_type_names(); + self.collect_type_names_with_context(); + self.collect_references(); - if self.config.emit_node_type { + if self.config.emit_node_type && self.referenced_builtins.contains(&TYPE_NODE) { self.emit_node_type(); } self.emit_definition(name, type_id); + self.emit_custom_type_aliases(); self.output } - fn collect_type_names(&mut self) { + fn collect_type_names_with_context(&mut self) { // Reserve definition names first for (name, _) in self.ctx.iter_def_types() { let pascal_name = to_pascal_case(name); self.used_names.insert(pascal_name); } - // Then assign names to anonymous types + // Collect naming contexts by traversing definition types + let mut type_contexts: HashMap = HashMap::new(); + + for (def_name, type_id) in self.ctx.iter_def_types() { + self.collect_contexts_for_type( + type_id, + &NamingContext { + def_name: def_name.to_string(), + field_name: None, + }, + &mut type_contexts, + ); + } + + // Assign names using contexts for (id, kind) in self.ctx.iter_types() { if self.needs_named_type(kind) && !self.type_names.contains_key(&id) { - let name = self.generate_type_name(kind); + let name = if let Some(ctx) = type_contexts.get(&id) { + self.generate_contextual_name(ctx) + } else { + self.generate_type_name(kind) + }; self.type_names.insert(id, name); } } } + fn collect_contexts_for_type( + &self, + type_id: TypeId, + ctx: &NamingContext, + contexts: &mut HashMap, + ) { + if type_id.is_builtin() || contexts.contains_key(&type_id) { + return; + } + + let Some(kind) = self.ctx.get_type(type_id) else { + return; + }; + + match kind { + TypeKind::Struct(fields) => { + // Only set context if this type needs a name + if !contexts.contains_key(&type_id) { + contexts.insert(type_id, ctx.clone()); + } + // Recurse into fields + for (field_name, info) in fields { + let field_ctx = NamingContext { + def_name: ctx.def_name.clone(), + field_name: Some(field_name.clone()), + }; + self.collect_contexts_for_type(info.type_id, &field_ctx, contexts); + } + } + TypeKind::Enum(variants) => { + if !contexts.contains_key(&type_id) { + contexts.insert(type_id, ctx.clone()); + } + // Don't recurse into variant types - they're inlined as $data + let _ = variants; + } + TypeKind::Array { element, .. } => { + self.collect_contexts_for_type(*element, ctx, contexts); + } + TypeKind::Optional(inner) => { + self.collect_contexts_for_type(*inner, ctx, contexts); + } + _ => {} + } + } + + fn generate_contextual_name(&mut self, ctx: &NamingContext) -> String { + let base = if let Some(field) = &ctx.field_name { + format!("{}{}", to_pascal_case(&ctx.def_name), to_pascal_case(field)) + } else { + to_pascal_case(&ctx.def_name) + }; + + self.unique_name(&base) + } + + fn collect_references(&mut self) { + for (_, type_id) in self.ctx.iter_def_types() { + self.collect_refs_in_type(type_id); + } + } + + fn collect_refs_in_type(&mut self, type_id: TypeId) { + if type_id == TYPE_NODE { + self.referenced_builtins.insert(TYPE_NODE); + return; + } + if type_id == TYPE_STRING { + self.referenced_builtins.insert(TYPE_STRING); + return; + } + if type_id == TYPE_VOID { + return; + } + + let Some(kind) = self.ctx.get_type(type_id) else { + return; + }; + + match kind { + TypeKind::Node => { + self.referenced_builtins.insert(TYPE_NODE); + } + TypeKind::String => { + self.referenced_builtins.insert(TYPE_STRING); + } + TypeKind::Custom(name) => { + self.custom_types.insert(name.clone()); + // Custom types alias to Node + self.referenced_builtins.insert(TYPE_NODE); + } + TypeKind::Struct(fields) => { + for (_, info) in fields { + self.collect_refs_in_type(info.type_id); + } + } + TypeKind::Enum(variants) => { + for (_, vtype) in variants { + self.collect_refs_in_type(*vtype); + } + } + TypeKind::Array { element, .. } => { + self.collect_refs_in_type(*element); + } + TypeKind::Optional(inner) => { + self.collect_refs_in_type(*inner); + } + _ => {} + } + } + fn needs_named_type(&self, kind: &TypeKind) -> bool { matches!(kind, TypeKind::Struct(_) | TypeKind::Enum(_)) } @@ -173,6 +323,14 @@ impl<'a> TsEmitter<'a> { } } + fn emit_custom_type_aliases(&mut self) { + let export = if self.config.export { "export " } else { "" }; + for name in &self.custom_types.clone() { + self.output + .push_str(&format!("{}type {} = Node;\n\n", export, name)); + } + } + fn emit_interface(&mut self, name: &str, fields: &BTreeMap, export: &str) { self.output .push_str(&format!("{}interface {} {{\n", export, name)); @@ -199,13 +357,12 @@ impl<'a> TsEmitter<'a> { let variant_type_name = format!("{}{}", name, to_pascal_case(variant_name)); variant_types.push(variant_type_name.clone()); - let data_type = self.type_to_ts(*type_id); + // Inline $data as struct literal instead of separate type + let data_str = self.inline_data_type(*type_id); self.output.push_str(&format!( "{}interface {} {{\n $tag: \"{}\";\n $data: {};\n}}\n\n", - export, variant_type_name, variant_name, data_type + export, variant_type_name, variant_name, data_str )); - - self.maybe_emit_nested_type(*type_id); } // Emit union type @@ -214,6 +371,19 @@ impl<'a> TsEmitter<'a> { .push_str(&format!("{}type {} = {};\n\n", export, name, union)); } + /// Inline a type as $data value (struct fields inlined, others as-is) + fn inline_data_type(&self, type_id: TypeId) -> String { + let Some(kind) = self.ctx.get_type(type_id) else { + return "unknown".to_string(); + }; + + match kind { + TypeKind::Struct(fields) => self.inline_struct(fields), + TypeKind::Void => "{}".to_string(), + _ => self.type_to_ts(type_id), + } + } + fn maybe_emit_nested_type(&mut self, type_id: TypeId) { let Some(kind) = self.ctx.get_type(type_id) else { return; @@ -232,6 +402,13 @@ impl<'a> TsEmitter<'a> { self.emit_interface(&name, fields, export); } } + TypeKind::Enum(variants) => { + if let Some(name) = self.type_names.get(&type_id) { + let name = name.clone(); + let export = if self.config.export { "export " } else { "" }; + self.emit_tagged_union(&name, variants, export); + } + } TypeKind::Array { element, .. } => { self.maybe_emit_nested_type(*element); } @@ -369,10 +546,20 @@ mod tests { } #[test] - fn emit_node_type() { + fn emit_node_type_only_when_referenced() { + // Empty context - Node should not be emitted let ctx = TypeContext::new(); let output = TsEmitter::new(&ctx, EmitConfig::default()).emit(); + assert!(!output.contains("interface Node")); + // Context with a definition using Node - should emit Node + let mut ctx = TypeContext::new(); + let mut fields = BTreeMap::new(); + fields.insert("x".to_string(), FieldInfo::required(TYPE_NODE)); + let struct_id = ctx.intern_type(TypeKind::Struct(fields)); + ctx.set_def_type("Q".to_string(), struct_id); + + let output = TsEmitter::new(&ctx, EmitConfig::default()).emit(); assert!(output.contains("interface Node")); assert!(output.contains("kind: string")); } diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs index 00ada5dd..de948788 100644 --- a/crates/plotnik-lib/src/query/type_check/infer.rs +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -276,20 +276,19 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { ); }; - // Special handling: if inner is a * or + quantifier, this capture serves as - // the row capture, so we skip strict dimensionality check - let inner_info = if let Expr::QuantifiedExpr(q) = &inner { + // Special handling for quantifiers: + // - * or +: this capture serves as row capture, skip strict dimensionality + // - ?: capture produces an optional field + let (inner_info, is_optional_capture) = if let Expr::QuantifiedExpr(q) = &inner { let quantifier = self.parse_quantifier(q); - if matches!( - quantifier, - QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore - ) { - self.infer_quantified_expr_as_row(q) - } else { - self.infer_expr(&inner) + match quantifier { + QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore => { + (self.infer_quantified_expr_as_row(q), false) + } + QuantifierKind::Optional => (self.infer_expr(&inner), true), } } else { - self.infer_expr(&inner) + (self.infer_expr(&inner), false) }; // Transform based on inner's flow @@ -312,9 +311,15 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { } }; + let field_info = if is_optional_capture { + FieldInfo::optional(captured_type) + } else { + FieldInfo::required(captured_type) + }; + TermInfo::new( inner_info.arity, - TypeFlow::single_field(capture_name, FieldInfo::required(captured_type)), + TypeFlow::single_field(capture_name, field_info), ) } diff --git a/crates/plotnik-lib/src/query/type_check/unify.rs b/crates/plotnik-lib/src/query/type_check/unify.rs index 5d2b4970..2ab6421a 100644 --- a/crates/plotnik-lib/src/query/type_check/unify.rs +++ b/crates/plotnik-lib/src/query/type_check/unify.rs @@ -5,7 +5,7 @@ use std::collections::BTreeMap; -use super::types::{FieldInfo, TYPE_NODE, TypeFlow, TypeId, TypeKind}; +use super::types::{FieldInfo, TYPE_NODE, TypeFlow, TypeId}; /// Error during type unification. #[derive(Clone, Debug)] diff --git a/crates/plotnik-lib/src/query/type_check_tests.rs b/crates/plotnik-lib/src/query/type_check_tests.rs index 94c20c51..0fd791ef 100644 --- a/crates/plotnik-lib/src/query/type_check_tests.rs +++ b/crates/plotnik-lib/src/query/type_check_tests.rs @@ -1,10 +1,6 @@ use crate::Query; use indoc::indoc; -// ============================================================================= -// BASIC CAPTURES -// ============================================================================= - #[test] fn capture_single_node() { let input = "Q = (identifier) @name"; @@ -26,11 +22,6 @@ fn capture_with_string_annotation() { let input = "Q = (identifier) @name :: string"; let res = Query::expect_valid_types(input); insta::assert_snapshot!(res, @r" - export interface Node { - kind: string; - text: string; - } - export interface Q { name: string; } @@ -50,13 +41,11 @@ fn capture_with_custom_type() { export interface Q { name: Identifier; } + + export type Identifier = Node; "); } -// ============================================================================= -// NAMED NODE FLOW PROPAGATION (Bug #2) -// ============================================================================= - #[test] fn named_node_with_field_capture() { // Child capture should bubble up through named node @@ -107,10 +96,6 @@ fn nested_named_node_captures() { "); } -// ============================================================================= -// SCALAR LISTS (Bug #1) -// ============================================================================= - #[test] fn scalar_list_zero_or_more() { // No internal captures → scalar list: Node[] @@ -145,10 +130,6 @@ fn scalar_list_one_or_more() { "); } -// ============================================================================= -// ROW LISTS -// ============================================================================= - #[test] fn row_list_basic() { let input = indoc! {r#" @@ -162,10 +143,10 @@ fn row_list_basic() { } export interface Q { - rows: Struct[]; + rows: QRows[]; } - export interface Struct { + export interface QRows { k: Node; v: Node; } @@ -185,20 +166,16 @@ fn row_list_non_empty() { } export interface Q { - rows: [Struct, ...Struct[]]; + rows: [QRows, ...QRows[]]; } - export interface Struct { + export interface QRows { k: Node; v: Node; } "); } -// ============================================================================= -// OPTIONAL PATTERNS -// ============================================================================= - #[test] fn optional_single_capture() { let input = "Q = (decorator)? @dec"; @@ -210,14 +187,13 @@ fn optional_single_capture() { } export interface Q { - dec: Node; + dec?: Node; } "); } #[test] fn optional_group_bubbles_fields() { - // ? does NOT require row capture; fields bubble as optional let input = indoc! {r#" Q = {(modifier) @mod (decorator) @dec}? "#}; @@ -235,10 +211,6 @@ fn optional_group_bubbles_fields() { "); } -// ============================================================================= -// SEQUENCES -// ============================================================================= - #[test] fn sequence_merges_fields() { let input = indoc! {r#" @@ -271,20 +243,16 @@ fn captured_sequence_creates_struct() { } export interface Q { - row: Struct; + row: QRow; } - export interface Struct { + export interface QRow { a: Node; b: Node; } "); } -// ============================================================================= -// UNTAGGED ALTERNATIONS (merge style) -// ============================================================================= - #[test] fn untagged_alt_same_capture_all_branches() { // Same capture in all branches → required field @@ -322,7 +290,6 @@ fn untagged_alt_different_captures() { #[test] fn untagged_alt_partial_overlap() { - // Partial overlap → common required, others optional let input = indoc! {r#" Q = [ {(a) @x (b) @y} @@ -343,10 +310,6 @@ fn untagged_alt_partial_overlap() { "); } -// ============================================================================= -// TAGGED ALTERNATIONS (Bug #3) -// ============================================================================= - #[test] fn tagged_alt_basic() { let input = indoc! {r#" @@ -361,20 +324,12 @@ fn tagged_alt_basic() { export interface QNum { $tag: "Num"; - $data: Struct2; - } - - export interface Struct2 { - n: Node; + $data: { n: Node }; } export interface QStr { $tag: "Str"; - $data: Struct; - } - - export interface Struct { - s: Node; + $data: { s: Node }; } export type Q = QNum | QStr; @@ -395,20 +350,12 @@ fn tagged_alt_with_type_annotation() { export interface QNum { $tag: "Num"; - $data: Struct2; - } - - export interface Struct2 { - n: Node; + $data: { n: Node }; } export interface QStr { $tag: "Str"; - $data: Struct; - } - - export interface Struct { - s: string; + $data: { s: string }; } export type Q = QNum | QStr; @@ -422,21 +369,29 @@ fn tagged_alt_captured() { Q = [Str: (string) @s Num: (number) @n] @result "#}; let res = Query::expect_valid_types(input); - insta::assert_snapshot!(res, @r" + insta::assert_snapshot!(res, @r#" export interface Node { kind: string; text: string; } export interface Q { - result: Enum; + result: QResult; } - "); -} -// ============================================================================= -// NESTED STRUCTURES -// ============================================================================= + export interface QResultNum { + $tag: "Num"; + $data: { n: Node }; + } + + export interface QResultStr { + $tag: "Str"; + $data: { s: Node }; + } + + export type QResult = QResultNum | QResultStr; + "#); +} #[test] fn nested_captured_group() { @@ -455,20 +410,16 @@ fn nested_captured_group() { export interface Q { name: Node; - pair: Struct; + pair: QPair; } - export interface Struct { + export interface QPair { k: Node; v: Node; } "); } -// ============================================================================= -// STRICT DIMENSIONALITY VIOLATIONS (errors) -// ============================================================================= - #[test] fn error_star_with_internal_captures_no_row() { let input = indoc! {r#" From 74ffe69adf68662aabe3dcdefdc409fc563acc22 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 14:53:50 -0300 Subject: [PATCH 05/18] Fixes --- .../src/query/type_check/context.rs | 98 +++++- .../src/query/type_check/emit_ts.rs | 310 ++++++++++++++---- .../plotnik-lib/src/query/type_check/infer.rs | 48 +-- .../plotnik-lib/src/query/type_check/mod.rs | 12 +- .../src/query/type_check/symbol.rs | 160 +++++++++ .../plotnik-lib/src/query/type_check/types.rs | 32 +- .../plotnik-lib/src/query/type_check/unify.rs | 55 ++-- .../plotnik-lib/src/query/type_check_tests.rs | 46 +-- 8 files changed, 598 insertions(+), 163 deletions(-) create mode 100644 crates/plotnik-lib/src/query/type_check/symbol.rs diff --git a/crates/plotnik-lib/src/query/type_check/context.rs b/crates/plotnik-lib/src/query/type_check/context.rs index 917c65f3..1cf191e9 100644 --- a/crates/plotnik-lib/src/query/type_check/context.rs +++ b/crates/plotnik-lib/src/query/type_check/context.rs @@ -1,17 +1,21 @@ -//! TypeContext: manages interned types and term info cache. +//! TypeContext: manages interned types, symbols, and term info cache. //! //! Types are interned to enable cheap equality checks and cycle handling. +//! Symbols are interned to enable cheap string comparison. //! TermInfo is cached per-expression to avoid recomputation. use std::collections::HashMap; use crate::parser::ast::Expr; +use super::symbol::{Interner, Symbol}; use super::types::{Arity, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeId, TypeKind}; -/// Central registry for types and expression metadata. +/// Central registry for types, symbols, and expression metadata. #[derive(Debug, Clone)] pub struct TypeContext { + /// String interner for field/type names + interner: Interner, /// Interned types by ID types: Vec, /// Deduplication map for type interning @@ -19,7 +23,7 @@ pub struct TypeContext { /// Cached term info per expression term_info: HashMap, /// Definition-level type info (for TypeScript emission) - def_types: HashMap, + def_types: HashMap, } impl Default for TypeContext { @@ -31,6 +35,7 @@ impl Default for TypeContext { impl TypeContext { pub fn new() -> Self { let mut ctx = Self { + interner: Interner::new(), types: Vec::new(), type_map: HashMap::new(), term_info: HashMap::new(), @@ -50,6 +55,34 @@ impl TypeContext { ctx } + // ========== Symbol interning ========== + + /// Intern a string, returning its Symbol. + #[inline] + pub fn intern(&mut self, s: &str) -> Symbol { + self.interner.intern(s) + } + + /// Intern an owned string. + #[inline] + pub fn intern_owned(&mut self, s: String) -> Symbol { + self.interner.intern_owned(s) + } + + /// Resolve a Symbol back to its string. + #[inline] + pub fn resolve(&self, sym: Symbol) -> &str { + self.interner.resolve(sym) + } + + /// Get a reference to the interner (for emission, etc.). + #[inline] + pub fn interner(&self) -> &Interner { + &self.interner + } + + // ========== Type interning ========== + /// Intern a type, returning its ID. Deduplicates identical types. pub fn intern_type(&mut self, kind: TypeKind) -> TypeId { if let Some(&id) = self.type_map.get(&kind) { @@ -73,6 +106,8 @@ impl TypeContext { (id, &self.types[id.0 as usize]) } + // ========== Term info cache ========== + /// Cache term info for an expression. pub fn set_term_info(&mut self, expr: Expr, info: TermInfo) { self.term_info.insert(expr, info); @@ -83,14 +118,33 @@ impl TypeContext { self.term_info.get(expr) } + // ========== Definition types ========== + /// Register the output type for a definition. - pub fn set_def_type(&mut self, name: String, type_id: TypeId) { + pub fn set_def_type(&mut self, name: Symbol, type_id: TypeId) { self.def_types.insert(name, type_id); } + /// Register the output type for a definition by string name. + pub fn set_def_type_by_name(&mut self, name: &str, type_id: TypeId) { + let sym = self.interner.intern(name); + self.def_types.insert(sym, type_id); + } + /// Get the output type for a definition. - pub fn get_def_type(&self, name: &str) -> Option { - self.def_types.get(name).copied() + pub fn get_def_type(&self, name: Symbol) -> Option { + self.def_types.get(&name).copied() + } + + /// Get the output type for a definition by string name. + pub fn get_def_type_by_name(&self, name: &str) -> Option { + // Linear scan since we don't have reverse lookup without interning + for (&sym, &type_id) in &self.def_types { + if self.interner.resolve(sym) == name { + return Some(type_id); + } + } + None } /// Get arity for an expression (for backward compatibility with expr_arity). @@ -98,6 +152,8 @@ impl TypeContext { self.term_info.get(expr).map(|info| info.arity) } + // ========== Iteration ========== + /// Iterate over all interned types. pub fn iter_types(&self) -> impl Iterator { self.types @@ -112,8 +168,8 @@ impl TypeContext { } /// Iterate over all definition types. - pub fn iter_def_types(&self) -> impl Iterator { - self.def_types.iter().map(|(k, v)| (k.as_str(), *v)) + pub fn iter_def_types(&self) -> impl Iterator + '_ { + self.def_types.iter().map(|(&sym, &type_id)| (sym, type_id)) } } @@ -148,12 +204,36 @@ mod tests { fn struct_types_intern_correctly() { let mut ctx = TypeContext::new(); + let x_sym = ctx.intern("x"); let mut fields = BTreeMap::new(); - fields.insert("x".to_string(), FieldInfo::required(TYPE_NODE)); + fields.insert(x_sym, FieldInfo::required(TYPE_NODE)); let id1 = ctx.intern_type(TypeKind::Struct(fields.clone())); let id2 = ctx.intern_type(TypeKind::Struct(fields)); assert_eq!(id1, id2); } + + #[test] + fn symbol_interning_works() { + let mut ctx = TypeContext::new(); + + let a = ctx.intern("foo"); + let b = ctx.intern("foo"); + let c = ctx.intern("bar"); + + assert_eq!(a, b); + assert_ne!(a, c); + assert_eq!(ctx.resolve(a), "foo"); + assert_eq!(ctx.resolve(c), "bar"); + } + + #[test] + fn def_type_by_name() { + let mut ctx = TypeContext::new(); + + ctx.set_def_type_by_name("Query", TYPE_NODE); + assert_eq!(ctx.get_def_type_by_name("Query"), Some(TYPE_NODE)); + assert_eq!(ctx.get_def_type_by_name("Missing"), None); + } } diff --git a/crates/plotnik-lib/src/query/type_check/emit_ts.rs b/crates/plotnik-lib/src/query/type_check/emit_ts.rs index f81415cf..0652430d 100644 --- a/crates/plotnik-lib/src/query/type_check/emit_ts.rs +++ b/crates/plotnik-lib/src/query/type_check/emit_ts.rs @@ -6,6 +6,7 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use super::context::TypeContext; +use super::symbol::Symbol; use super::types::{FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TypeId, TypeKind}; /// Naming context for synthetic type names: (DefName, FieldName) @@ -47,10 +48,11 @@ pub struct TsEmitter<'a> { used_names: BTreeSet, /// TypeId -> generated name mapping type_names: HashMap, - /// Custom type names that need `type X = Node` aliases - custom_types: BTreeSet, + /// Track which builtin types are referenced referenced_builtins: HashSet, + /// Track which types have been emitted + emitted: HashSet, /// Output buffer output: String, } @@ -62,8 +64,8 @@ impl<'a> TsEmitter<'a> { config, used_names: BTreeSet::new(), type_names: HashMap::new(), - custom_types: BTreeSet::new(), referenced_builtins: HashSet::new(), + emitted: HashSet::new(), output: String::new(), } } @@ -81,13 +83,26 @@ impl<'a> TsEmitter<'a> { self.emit_node_type(); } - // Emit each definition type - for (name, type_id) in self.ctx.iter_def_types() { - self.emit_definition(name, type_id); - } + // Collect definition names for lookup + let def_names: HashMap = self + .ctx + .iter_def_types() + .map(|(sym, id)| (id, self.ctx.resolve(sym).to_string())) + .collect(); - // Emit custom type aliases - self.emit_custom_type_aliases(); + // Compute topological order (leaves first) + let sorted = self.topological_sort(); + + // Emit types in topological order + for type_id in sorted { + if let Some(def_name) = def_names.get(&type_id) { + self.emit_definition(def_name, type_id); + } else if let Some(name) = self.type_names.get(&type_id).cloned() { + self.emit_nested_type(type_id, &name); + } else if let Some(TypeKind::Custom(sym)) = self.ctx.get_type(type_id) { + self.emit_custom_type_alias(self.ctx.resolve(*sym)); + } + } self.output } @@ -101,14 +116,169 @@ impl<'a> TsEmitter<'a> { self.emit_node_type(); } + // Compute topological order for types reachable from this definition + let sorted = self.topological_sort_from(type_id); + + // Emit nested types first (in dependency order) + for nested_id in &sorted { + if *nested_id != type_id { + if let Some(nested_name) = self.type_names.get(nested_id).cloned() { + self.emit_nested_type(*nested_id, &nested_name); + } else if let Some(TypeKind::Custom(sym)) = self.ctx.get_type(*nested_id) { + self.emit_custom_type_alias(self.ctx.resolve(*sym)); + } + } + } + + // Emit the main definition last self.emit_definition(name, type_id); - self.emit_custom_type_aliases(); self.output } + /// Compute topological sort of all types (leaves first). + fn topological_sort(&self) -> Vec { + // Collect all types that need emission + let mut to_emit: HashSet = HashSet::new(); + for (_, type_id) in self.ctx.iter_def_types() { + self.collect_emittable_types(type_id, &mut to_emit); + } + + self.topo_sort_set(&to_emit) + } + + /// Compute topological sort starting from a single type. + fn topological_sort_from(&self, root: TypeId) -> Vec { + let mut to_emit: HashSet = HashSet::new(); + self.collect_emittable_types(root, &mut to_emit); + self.topo_sort_set(&to_emit) + } + + /// Collect all types reachable from `type_id` that need emission. + fn collect_emittable_types(&self, type_id: TypeId, out: &mut HashSet) { + if type_id.is_builtin() || out.contains(&type_id) { + return; + } + + let Some(kind) = self.ctx.get_type(type_id) else { + return; + }; + + match kind { + TypeKind::Struct(fields) => { + out.insert(type_id); + for info in fields.values() { + self.collect_emittable_types(info.type_id, out); + } + } + TypeKind::Enum(_) => { + // Variant payload structs are inlined in $data by emit_tagged_union, + // so don't collect them for separate emission + out.insert(type_id); + } + TypeKind::Array { element, .. } => { + self.collect_emittable_types(*element, out); + } + TypeKind::Optional(inner) => { + self.collect_emittable_types(*inner, out); + } + TypeKind::Custom(_) => { + out.insert(type_id); + } + _ => {} + } + } + + /// Topologically sort a set of types (leaves first via Kahn's algorithm). + fn topo_sort_set(&self, types: &HashSet) -> Vec { + // Build adjacency: type -> types it depends on (within the set) + let mut deps: HashMap> = HashMap::new(); + let mut rdeps: HashMap> = HashMap::new(); + + for &tid in types { + deps.entry(tid).or_default(); + rdeps.entry(tid).or_default(); + } + + for &tid in types { + for dep in self.direct_deps(tid) { + if types.contains(&dep) && dep != tid { + deps.entry(tid).or_default().insert(dep); + rdeps.entry(dep).or_default().insert(tid); + } + } + } + + // Kahn's algorithm: start with nodes that have no dependencies + let mut result = Vec::with_capacity(types.len()); + let mut queue: Vec = deps + .iter() + .filter(|(_, d)| d.is_empty()) + .map(|(&tid, _)| tid) + .collect(); + + // Sort for deterministic output + queue.sort_by_key(|tid| tid.0); + + while let Some(tid) = queue.pop() { + result.push(tid); + if let Some(dependents) = rdeps.get(&tid) { + for &dependent in dependents { + if let Some(dep_set) = deps.get_mut(&dependent) { + dep_set.remove(&tid); + if dep_set.is_empty() { + queue.push(dependent); + queue.sort_by_key(|t| t.0); + } + } + } + } + } + + result + } + + /// Get direct type dependencies of a type (non-recursive). + /// Unwraps Array/Optional to find actual emittable dependencies. + fn direct_deps(&self, type_id: TypeId) -> Vec { + let Some(kind) = self.ctx.get_type(type_id) else { + return vec![]; + }; + + match kind { + TypeKind::Struct(fields) => fields + .values() + .flat_map(|info| self.unwrap_to_emittable(info.type_id)) + .collect(), + TypeKind::Enum(variants) => variants + .values() + .flat_map(|&tid| self.unwrap_to_emittable(tid)) + .collect(), + TypeKind::Array { element, .. } => self.unwrap_to_emittable(*element), + TypeKind::Optional(inner) => self.unwrap_to_emittable(*inner), + _ => vec![], + } + } + + /// Unwrap Array/Optional wrappers to find the underlying emittable type. + fn unwrap_to_emittable(&self, type_id: TypeId) -> Vec { + if type_id.is_builtin() { + return vec![]; + } + let Some(kind) = self.ctx.get_type(type_id) else { + return vec![]; + }; + match kind { + TypeKind::Array { element, .. } => self.unwrap_to_emittable(*element), + TypeKind::Optional(inner) => self.unwrap_to_emittable(*inner), + TypeKind::Struct(_) | TypeKind::Enum(_) | TypeKind::Custom(_) => vec![type_id], + _ => vec![], + } + } + fn collect_type_names_with_context(&mut self) { // Reserve definition names first - for (name, _) in self.ctx.iter_def_types() { + for (sym, _) in self.ctx.iter_def_types() { + let name = self.ctx.resolve(sym); let pascal_name = to_pascal_case(name); self.used_names.insert(pascal_name); } @@ -116,7 +286,8 @@ impl<'a> TsEmitter<'a> { // Collect naming contexts by traversing definition types let mut type_contexts: HashMap = HashMap::new(); - for (def_name, type_id) in self.ctx.iter_def_types() { + for (sym, type_id) in self.ctx.iter_def_types() { + let def_name = self.ctx.resolve(sym); self.collect_contexts_for_type( type_id, &NamingContext { @@ -161,10 +332,11 @@ impl<'a> TsEmitter<'a> { contexts.insert(type_id, ctx.clone()); } // Recurse into fields - for (field_name, info) in fields { + for (&field_sym, info) in fields { + let field_name = self.ctx.resolve(field_sym); let field_ctx = NamingContext { def_name: ctx.def_name.clone(), - field_name: Some(field_name.clone()), + field_name: Some(field_name.to_string()), }; self.collect_contexts_for_type(info.type_id, &field_ctx, contexts); } @@ -226,19 +398,18 @@ impl<'a> TsEmitter<'a> { TypeKind::String => { self.referenced_builtins.insert(TYPE_STRING); } - TypeKind::Custom(name) => { - self.custom_types.insert(name.clone()); + TypeKind::Custom(_) => { // Custom types alias to Node self.referenced_builtins.insert(TYPE_NODE); } TypeKind::Struct(fields) => { - for (_, info) in fields { + for info in fields.values() { self.collect_refs_in_type(info.type_id); } } TypeKind::Enum(variants) => { - for (_, vtype) in variants { - self.collect_refs_in_type(*vtype); + for &vtype in variants.values() { + self.collect_refs_in_type(vtype); } } TypeKind::Array { element, .. } => { @@ -323,19 +494,22 @@ impl<'a> TsEmitter<'a> { } } - fn emit_custom_type_aliases(&mut self) { + fn emit_custom_type_alias(&mut self, name: &str) { let export = if self.config.export { "export " } else { "" }; - for name in &self.custom_types.clone() { - self.output - .push_str(&format!("{}type {} = Node;\n\n", export, name)); - } + self.output + .push_str(&format!("{}type {} = Node;\n\n", export, name)); } - fn emit_interface(&mut self, name: &str, fields: &BTreeMap, export: &str) { + fn emit_interface(&mut self, name: &str, fields: &BTreeMap, export: &str) { self.output .push_str(&format!("{}interface {} {{\n", export, name)); - for (field_name, info) in fields { + // Sort fields by resolved name for deterministic output + let mut sorted_fields: Vec<_> = fields.iter().collect(); + sorted_fields.sort_by_key(|&(&sym, _)| self.ctx.resolve(sym)); + + for (&field_sym, info) in sorted_fields { + let field_name = self.ctx.resolve(field_sym); let ts_type = self.type_to_ts(info.type_id); let optional = if info.optional { "?" } else { "" }; self.output @@ -343,29 +517,27 @@ impl<'a> TsEmitter<'a> { } self.output.push_str("}\n\n"); - - // Emit nested types - for (_, info) in fields { - self.maybe_emit_nested_type(info.type_id); - } } - fn emit_tagged_union(&mut self, name: &str, variants: &BTreeMap, export: &str) { - // Emit variant types first + fn emit_tagged_union(&mut self, name: &str, variants: &BTreeMap, export: &str) { let mut variant_types = Vec::new(); - for (variant_name, type_id) in variants { + + // Sort variants by resolved name for deterministic output + let mut sorted_variants: Vec<_> = variants.iter().collect(); + sorted_variants.sort_by_key(|&(&sym, _)| self.ctx.resolve(sym)); + + for (&variant_sym, &type_id) in sorted_variants { + let variant_name = self.ctx.resolve(variant_sym); let variant_type_name = format!("{}{}", name, to_pascal_case(variant_name)); variant_types.push(variant_type_name.clone()); - // Inline $data as struct literal instead of separate type - let data_str = self.inline_data_type(*type_id); + let data_str = self.inline_data_type(type_id); self.output.push_str(&format!( "{}interface {} {{\n $tag: \"{}\";\n $data: {};\n}}\n\n", export, variant_type_name, variant_name, data_str )); } - // Emit union type let union = variant_types.join(" | "); self.output .push_str(&format!("{}type {} = {};\n\n", export, name, union)); @@ -384,36 +556,25 @@ impl<'a> TsEmitter<'a> { } } - fn maybe_emit_nested_type(&mut self, type_id: TypeId) { + /// Emit a nested type by its generated name. + fn emit_nested_type(&mut self, type_id: TypeId, name: &str) { + if self.emitted.contains(&type_id) || type_id.is_builtin() { + return; + } + self.emitted.insert(type_id); + let Some(kind) = self.ctx.get_type(type_id) else { return; }; - // Skip if already emitted or is a primitive - if type_id.is_builtin() { - return; - } + let export = if self.config.export { "export " } else { "" }; match kind { TypeKind::Struct(fields) => { - if let Some(name) = self.type_names.get(&type_id) { - let name = name.clone(); - let export = if self.config.export { "export " } else { "" }; - self.emit_interface(&name, fields, export); - } + self.emit_interface(name, fields, export); } TypeKind::Enum(variants) => { - if let Some(name) = self.type_names.get(&type_id) { - let name = name.clone(); - let export = if self.config.export { "export " } else { "" }; - self.emit_tagged_union(&name, variants, export); - } - } - TypeKind::Array { element, .. } => { - self.maybe_emit_nested_type(*element); - } - TypeKind::Optional(inner) => { - self.maybe_emit_nested_type(*inner); + self.emit_tagged_union(name, variants, export); } _ => {} } @@ -438,8 +599,8 @@ impl<'a> TsEmitter<'a> { TypeKind::Void => "void".to_string(), TypeKind::Node => "Node".to_string(), TypeKind::String => "string".to_string(), - TypeKind::Custom(name) => name.clone(), - TypeKind::Ref(name) => to_pascal_case(name), + TypeKind::Custom(sym) => self.ctx.resolve(*sym).to_string(), + TypeKind::Ref(sym) => to_pascal_case(self.ctx.resolve(*sym)), TypeKind::Struct(fields) => { if let Some(name) = self.type_names.get(&type_id) { @@ -473,14 +634,19 @@ impl<'a> TsEmitter<'a> { } } - fn inline_struct(&self, fields: &BTreeMap) -> String { + fn inline_struct(&self, fields: &BTreeMap) -> String { if fields.is_empty() { return "{}".to_string(); } - let field_strs: Vec<_> = fields + // Sort fields by resolved name for deterministic output + let mut sorted_fields: Vec<_> = fields.iter().collect(); + sorted_fields.sort_by_key(|&(&sym, _)| self.ctx.resolve(sym)); + + let field_strs: Vec<_> = sorted_fields .iter() - .map(|(name, info)| { + .map(|&(&sym, ref info)| { + let name = self.ctx.resolve(sym); let ts_type = self.type_to_ts(info.type_id); let optional = if info.optional { "?" } else { "" }; format!("{}{}: {}", name, optional, ts_type) @@ -490,11 +656,16 @@ impl<'a> TsEmitter<'a> { format!("{{ {} }}", field_strs.join("; ")) } - fn inline_enum(&self, variants: &BTreeMap) -> String { - let variant_strs: Vec<_> = variants + fn inline_enum(&self, variants: &BTreeMap) -> String { + // Sort variants by resolved name for deterministic output + let mut sorted_variants: Vec<_> = variants.iter().collect(); + sorted_variants.sort_by_key(|&(&sym, _)| self.ctx.resolve(sym)); + + let variant_strs: Vec<_> = sorted_variants .iter() - .map(|(name, type_id)| { - let data_type = self.type_to_ts(*type_id); + .map(|&(&sym, &type_id)| { + let name = self.ctx.resolve(sym); + let data_type = self.type_to_ts(type_id); format!("{{ $tag: \"{}\"; $data: {} }}", name, data_type) }) .collect(); @@ -554,10 +725,11 @@ mod tests { // Context with a definition using Node - should emit Node let mut ctx = TypeContext::new(); + let x_sym = ctx.intern("x"); let mut fields = BTreeMap::new(); - fields.insert("x".to_string(), FieldInfo::required(TYPE_NODE)); + fields.insert(x_sym, FieldInfo::required(TYPE_NODE)); let struct_id = ctx.intern_type(TypeKind::Struct(fields)); - ctx.set_def_type("Q".to_string(), struct_id); + ctx.set_def_type_by_name("Q", struct_id); let output = TsEmitter::new(&ctx, EmitConfig::default()).emit(); assert!(output.contains("interface Node")); diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs index de948788..c8355cd8 100644 --- a/crates/plotnik-lib/src/query/type_check/infer.rs +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -5,6 +5,8 @@ use std::collections::BTreeMap; +use super::symbol::Symbol; + use rowan::TextRange; use crate::diagnostics::{DiagnosticKind, Diagnostics}; @@ -76,7 +78,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { /// Named node: matches one position, bubbles up child captures fn infer_named_node(&mut self, node: &NamedNode) -> TermInfo { - let mut merged_fields: BTreeMap = BTreeMap::new(); + let mut merged_fields: BTreeMap = BTreeMap::new(); for child in node.children() { let child_info = self.infer_expr(&child); @@ -135,7 +137,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { }; // Merge fields from all children - let mut merged_fields: BTreeMap = BTreeMap::new(); + let mut merged_fields: BTreeMap = BTreeMap::new(); for child in &children { let child_info = self.infer_expr(child); @@ -150,7 +152,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { DiagnosticKind::DuplicateCaptureInScope, child.text_range(), ) - .message(&name) + .message(self.ctx.resolve(name)) .emit(); } else { merged_fields.insert(name, info); @@ -181,19 +183,19 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { } fn infer_tagged_alt(&mut self, alt: &AltExpr) -> TermInfo { - let mut variants: BTreeMap = BTreeMap::new(); + let mut variants: BTreeMap = BTreeMap::new(); let mut combined_arity = Arity::One; for branch in alt.branches() { let Some(label) = branch.label() else { continue; }; - let label_text = label.text().to_string(); + let label_sym = self.ctx.intern(label.text()); let Some(body) = branch.body() else { // Empty variant gets void/empty struct type variants.insert( - label_text, + label_sym, self.ctx.intern_type(TypeKind::Struct(BTreeMap::new())), ); continue; @@ -204,7 +206,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { // Convert flow to a type for this variant let variant_type = self.flow_to_type(&body_info.flow); - variants.insert(label_text, variant_type); + variants.insert(label_sym, variant_type); } // Tagged alternation produces an Enum type @@ -252,7 +254,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { .map(|inner| self.infer_expr(&inner)) .unwrap_or_else(TermInfo::void); }; - let capture_name = name_tok.text().to_string(); + let capture_name = self.ctx.intern(name_tok.text()); // Check for type annotation let annotation_type = cap.type_annotation().and_then(|t| { @@ -261,8 +263,8 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { if type_name == "string" { TYPE_STRING } else { - self.ctx - .intern_type(TypeKind::Custom(type_name.to_string())) + let type_sym = self.ctx.intern(type_name); + self.ctx.intern_type(TypeKind::Custom(type_sym)) } }) }); @@ -453,7 +455,10 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { .map(|t| t.text().to_string()) .unwrap_or_else(|| "*".to_string()); - let capture_names: Vec<_> = fields.keys().map(|s| format!("`@{}`", s)).collect(); + let capture_names: Vec<_> = fields + .keys() + .map(|s| format!("`@{}`", self.ctx.resolve(*s))) + .collect(); let captures_str = capture_names.join(", "); self.diag @@ -499,15 +504,18 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { DiagnosticKind::IncompatibleTypes, "scalar type in untagged alternation; use tagged alternation instead".to_string(), ), - UnifyError::IncompatibleTypes { field } => { - (DiagnosticKind::IncompatibleCaptureTypes, field.clone()) - } - UnifyError::IncompatibleStructs { field } => { - (DiagnosticKind::IncompatibleStructShapes, field.clone()) - } - UnifyError::IncompatibleArrayElements { field } => { - (DiagnosticKind::IncompatibleCaptureTypes, field.clone()) - } + UnifyError::IncompatibleTypes { field } => ( + DiagnosticKind::IncompatibleCaptureTypes, + self.ctx.resolve(*field).to_string(), + ), + UnifyError::IncompatibleStructs { field } => ( + DiagnosticKind::IncompatibleStructShapes, + self.ctx.resolve(*field).to_string(), + ), + UnifyError::IncompatibleArrayElements { field } => ( + DiagnosticKind::IncompatibleCaptureTypes, + self.ctx.resolve(*field).to_string(), + ), }; self.diag diff --git a/crates/plotnik-lib/src/query/type_check/mod.rs b/crates/plotnik-lib/src/query/type_check/mod.rs index 85d5bcaa..72cde7ca 100644 --- a/crates/plotnik-lib/src/query/type_check/mod.rs +++ b/crates/plotnik-lib/src/query/type_check/mod.rs @@ -8,17 +8,21 @@ mod context; mod emit_ts; mod infer; +mod symbol; mod types; mod unify; pub use context::TypeContext; pub use emit_ts::{EmitConfig, TsEmitter, emit_typescript, emit_typescript_with_config}; +pub use symbol::{Interner, Symbol}; pub use types::{ Arity, FieldInfo, QuantifierKind, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeFlow, TypeId, TypeKind, }; pub use unify::{UnifyError, unify_flow, unify_flows}; +use std::collections::BTreeMap; + use indexmap::IndexMap; use crate::diagnostics::Diagnostics; @@ -60,7 +64,7 @@ pub fn infer_types( if let Some(body) = symbol_table.get(def_name) { if let Some(info) = ctx.get_term_info(body).cloned() { let type_id = flow_to_type_id(&mut ctx, &info.flow); - ctx.set_def_type(def_name.to_string(), type_id); + ctx.set_def_type_by_name(def_name, type_id); } } } @@ -68,7 +72,7 @@ pub fn infer_types( // Handle any definitions not in an SCC (shouldn't happen, but be safe) for (name, source_id, _body) in symbol_table.iter_full() { - if ctx.get_def_type(name).is_some() { + if ctx.get_def_type_by_name(name).is_some() { continue; } @@ -81,7 +85,7 @@ pub fn infer_types( if let Some(body) = symbol_table.get(name) { if let Some(info) = ctx.get_term_info(body).cloned() { let type_id = flow_to_type_id(&mut ctx, &info.flow); - ctx.set_def_type(name.to_string(), type_id); + ctx.set_def_type_by_name(name, type_id); } } } @@ -92,7 +96,7 @@ pub fn infer_types( /// Convert a TypeFlow to a TypeId for storage. fn flow_to_type_id(ctx: &mut TypeContext, flow: &TypeFlow) -> TypeId { match flow { - TypeFlow::Void => ctx.intern_type(TypeKind::Struct(std::collections::BTreeMap::new())), + TypeFlow::Void => ctx.intern_type(TypeKind::Struct(BTreeMap::new())), TypeFlow::Scalar(type_id) => *type_id, TypeFlow::Fields(fields) => ctx.intern_type(TypeKind::Struct(fields.clone())), } diff --git a/crates/plotnik-lib/src/query/type_check/symbol.rs b/crates/plotnik-lib/src/query/type_check/symbol.rs new file mode 100644 index 00000000..bb2116ef --- /dev/null +++ b/crates/plotnik-lib/src/query/type_check/symbol.rs @@ -0,0 +1,160 @@ +//! Symbol interning for field and type names. +//! +//! Converts heap-allocated strings into cheap integer handles. +//! Comparing two symbols is O(1) integer comparison. + +use std::collections::HashMap; + +/// A lightweight handle to an interned string. +/// +/// Comparing two symbols is O(1). Symbols are ordered by insertion order, +/// not lexicographically—use `Interner::resolve` if you need string ordering. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct Symbol(u32); + +impl Symbol { + /// Raw index for serialization/debugging. + #[inline] + pub fn as_u32(self) -> u32 { + self.0 + } +} + +// Implement Ord based on raw index (insertion order). +// For deterministic output, sort by resolved string when needed. +impl PartialOrd for Symbol { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Symbol { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.cmp(&other.0) + } +} + +/// String interner. Deduplicates strings and returns cheap Symbol handles. +#[derive(Debug, Clone, Default)] +pub struct Interner { + /// Map from string to symbol for deduplication. + map: HashMap, + /// Storage for interned strings, indexed by Symbol. + strings: Vec, +} + +impl Interner { + pub fn new() -> Self { + Self::default() + } + + /// Intern a string, returning its Symbol. + /// If the string was already interned, returns the existing Symbol. + pub fn intern(&mut self, s: &str) -> Symbol { + if let Some(&sym) = self.map.get(s) { + return sym; + } + + let sym = Symbol(self.strings.len() as u32); + self.strings.push(s.to_owned()); + self.map.insert(s.to_owned(), sym); + sym + } + + /// Intern an owned string, avoiding clone if not already present. + pub fn intern_owned(&mut self, s: String) -> Symbol { + if let Some(&sym) = self.map.get(&s) { + return sym; + } + + let sym = Symbol(self.strings.len() as u32); + self.strings.push(s.clone()); + self.map.insert(s, sym); + sym + } + + /// Resolve a Symbol back to its string. + /// + /// # Panics + /// Panics if the symbol was not created by this interner. + #[inline] + pub fn resolve(&self, sym: Symbol) -> &str { + &self.strings[sym.0 as usize] + } + + /// Try to resolve a Symbol, returning None if invalid. + #[inline] + pub fn try_resolve(&self, sym: Symbol) -> Option<&str> { + self.strings.get(sym.0 as usize).map(|s| s.as_str()) + } + + /// Number of interned strings. + #[inline] + pub fn len(&self) -> usize { + self.strings.len() + } + + /// Whether the interner is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.strings.is_empty() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn intern_deduplicates() { + let mut interner = Interner::new(); + + let a = interner.intern("foo"); + let b = interner.intern("foo"); + let c = interner.intern("bar"); + + assert_eq!(a, b); + assert_ne!(a, c); + assert_eq!(interner.len(), 2); + } + + #[test] + fn resolve_roundtrip() { + let mut interner = Interner::new(); + + let sym = interner.intern("hello"); + assert_eq!(interner.resolve(sym), "hello"); + } + + #[test] + fn intern_owned_avoids_clone_on_hit() { + let mut interner = Interner::new(); + + let a = interner.intern("test"); + let b = interner.intern_owned("test".to_string()); + + assert_eq!(a, b); + assert_eq!(interner.len(), 1); + } + + #[test] + fn symbols_are_copy() { + let mut interner = Interner::new(); + let sym = interner.intern("x"); + + // Symbol is Copy, so this should work without move + let copy = sym; + assert_eq!(sym, copy); + } + + #[test] + fn symbol_ordering_is_insertion_order() { + let mut interner = Interner::new(); + + let z = interner.intern("z"); + let a = interner.intern("a"); + + // z was inserted first, so z < a by insertion order + assert!(z < a); + } +} diff --git a/crates/plotnik-lib/src/query/type_check/types.rs b/crates/plotnik-lib/src/query/type_check/types.rs index 783c79b4..5b4a7d8c 100644 --- a/crates/plotnik-lib/src/query/type_check/types.rs +++ b/crates/plotnik-lib/src/query/type_check/types.rs @@ -6,6 +6,8 @@ use std::collections::BTreeMap; +use super::symbol::Symbol; + /// Interned type identifier. Types are stored in TypeContext and referenced by ID. #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] pub struct TypeId(pub u32); @@ -33,17 +35,17 @@ pub enum TypeKind { /// Extracted text from a node String, /// User-specified type via `@x :: TypeName` - Custom(String), - /// Object with named fields - Struct(BTreeMap), - /// Tagged union from labeled alternations - Enum(BTreeMap), + Custom(Symbol), + /// Object with named fields (keys are interned Symbols) + Struct(BTreeMap), + /// Tagged union from labeled alternations (keys are interned Symbols) + Enum(BTreeMap), /// Array type with element type Array { element: TypeId, non_empty: bool }, /// Optional wrapper Optional(TypeId), - /// Forward reference to a recursive type - Ref(String), + /// Forward reference to a recursive type (name as Symbol) + Ref(Symbol), } impl TypeKind { @@ -67,7 +69,7 @@ impl TypeKind { } /// Field information within a struct type. -#[derive(Clone, PartialEq, Eq, Hash, Debug)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] pub struct FieldInfo { pub type_id: TypeId, pub optional: bool, @@ -129,7 +131,7 @@ impl TermInfo { } } - pub fn fields(arity: Arity, fields: BTreeMap) -> Self { + pub fn fields(arity: Arity, fields: BTreeMap) -> Self { Self { arity, flow: TypeFlow::Fields(fields), @@ -170,8 +172,8 @@ pub enum TypeFlow { Void, /// Opaque single value that doesn't bubble Scalar(TypeId), - /// Transparent fields that bubble to parent scope - Fields(BTreeMap), + /// Transparent fields that bubble to parent scope (keys are interned Symbols) + Fields(BTreeMap), } impl TypeFlow { @@ -195,16 +197,16 @@ impl TypeFlow { } } - /// Get field names if this is a Fields flow - pub fn field_names(&self) -> Option> { + /// Get field symbols if this is a Fields flow + pub fn field_symbols(&self) -> Option> { match self { - TypeFlow::Fields(f) => Some(f.keys().map(|s| s.as_str()).collect()), + TypeFlow::Fields(f) => Some(f.keys().copied().collect()), _ => None, } } /// Create a single-field flow - pub fn single_field(name: String, info: FieldInfo) -> Self { + pub fn single_field(name: Symbol, info: FieldInfo) -> Self { let mut fields = BTreeMap::new(); fields.insert(name, info); TypeFlow::Fields(fields) diff --git a/crates/plotnik-lib/src/query/type_check/unify.rs b/crates/plotnik-lib/src/query/type_check/unify.rs index 2ab6421a..cec0f838 100644 --- a/crates/plotnik-lib/src/query/type_check/unify.rs +++ b/crates/plotnik-lib/src/query/type_check/unify.rs @@ -5,6 +5,7 @@ use std::collections::BTreeMap; +use super::symbol::Symbol; use super::types::{FieldInfo, TYPE_NODE, TypeFlow, TypeId}; /// Error during type unification. @@ -13,20 +14,20 @@ pub enum UnifyError { /// Scalar type appeared in untagged alternation (needs tagging) ScalarInUntagged, /// Capture has incompatible types across branches - IncompatibleTypes { field: String }, + IncompatibleTypes { field: Symbol }, /// Capture has incompatible struct shapes across branches - IncompatibleStructs { field: String }, + IncompatibleStructs { field: Symbol }, /// Array element types don't match - IncompatibleArrayElements { field: String }, + IncompatibleArrayElements { field: Symbol }, } impl UnifyError { - pub fn field_name(&self) -> Option<&str> { + pub fn field_symbol(&self) -> Option { match self { UnifyError::ScalarInUntagged => None, UnifyError::IncompatibleTypes { field } | UnifyError::IncompatibleStructs { field } - | UnifyError::IncompatibleArrayElements { field } => Some(field), + | UnifyError::IncompatibleArrayElements { field } => Some(*field), } } } @@ -64,7 +65,7 @@ pub fn unify_flows(flows: impl IntoIterator) -> Result) -> BTreeMap { +fn make_all_optional(fields: BTreeMap) -> BTreeMap { fields .into_iter() .map(|(k, v)| (k, v.make_optional())) @@ -77,19 +78,19 @@ fn make_all_optional(fields: BTreeMap) -> BTreeMap, - b: BTreeMap, -) -> Result, UnifyError> { + a: BTreeMap, + b: BTreeMap, +) -> Result, UnifyError> { let mut result = BTreeMap::new(); // Process all keys from a for (key, a_info) in &a { if let Some(b_info) = b.get(key) { // Key exists in both: unify types - let unified_type = unify_type_ids(a_info.type_id, b_info.type_id, key)?; + let unified_type = unify_type_ids(a_info.type_id, b_info.type_id, *key)?; let optional = a_info.optional || b_info.optional; result.insert( - key.clone(), + *key, FieldInfo { type_id: unified_type, optional, @@ -97,7 +98,7 @@ fn merge_fields( ); } else { // Key only in a: make optional - result.insert(key.clone(), a_info.clone().make_optional()); + result.insert(*key, a_info.make_optional()); } } @@ -115,7 +116,7 @@ fn merge_fields( /// /// For now, types must match exactly (except Node is compatible with Node). /// Future: could allow structural subtyping for structs. -fn unify_type_ids(a: TypeId, b: TypeId, field: &str) -> Result { +fn unify_type_ids(a: TypeId, b: TypeId, field: Symbol) -> Result { if a == b { return Ok(a); } @@ -126,15 +127,19 @@ fn unify_type_ids(a: TypeId, b: TypeId, field: &str) -> Result Symbol { + // For tests, create symbols directly. In real code, use Interner. + // This is safe because tests don't need actual string resolution. + unsafe { std::mem::transmute(n) } + } + #[test] fn unify_void_void() { let result = unify_flow(TypeFlow::Void, TypeFlow::Void); @@ -143,14 +148,15 @@ mod tests { #[test] fn unify_void_fields() { + let x = make_symbol(0); let mut fields = BTreeMap::new(); - fields.insert("x".to_string(), FieldInfo::required(TYPE_NODE)); + fields.insert(x, FieldInfo::required(TYPE_NODE)); let result = unify_flow(TypeFlow::Void, TypeFlow::Fields(fields)).unwrap(); match result { TypeFlow::Fields(f) => { - assert!(f.get("x").unwrap().optional); + assert!(f.get(&x).unwrap().optional); } _ => panic!("expected Fields"), } @@ -158,21 +164,24 @@ mod tests { #[test] fn unify_fields_merge() { + let x = make_symbol(0); + let y = make_symbol(1); + let mut a = BTreeMap::new(); - a.insert("x".to_string(), FieldInfo::required(TYPE_NODE)); + a.insert(x, FieldInfo::required(TYPE_NODE)); let mut b = BTreeMap::new(); - b.insert("x".to_string(), FieldInfo::required(TYPE_NODE)); - b.insert("y".to_string(), FieldInfo::required(TYPE_NODE)); + b.insert(x, FieldInfo::required(TYPE_NODE)); + b.insert(y, FieldInfo::required(TYPE_NODE)); let result = unify_flow(TypeFlow::Fields(a), TypeFlow::Fields(b)).unwrap(); match result { TypeFlow::Fields(f) => { // x is in both, so required - assert!(!f.get("x").unwrap().optional); + assert!(!f.get(&x).unwrap().optional); // y only in b, so optional - assert!(f.get("y").unwrap().optional); + assert!(f.get(&y).unwrap().optional); } _ => panic!("expected Fields"), } diff --git a/crates/plotnik-lib/src/query/type_check_tests.rs b/crates/plotnik-lib/src/query/type_check_tests.rs index 0fd791ef..84046525 100644 --- a/crates/plotnik-lib/src/query/type_check_tests.rs +++ b/crates/plotnik-lib/src/query/type_check_tests.rs @@ -38,11 +38,11 @@ fn capture_with_custom_type() { text: string; } + export type Identifier = Node; + export interface Q { name: Identifier; } - - export type Identifier = Node; "); } @@ -142,14 +142,14 @@ fn row_list_basic() { text: string; } - export interface Q { - rows: QRows[]; - } - export interface QRows { k: Node; v: Node; } + + export interface Q { + rows: QRows[]; + } "); } @@ -165,14 +165,14 @@ fn row_list_non_empty() { text: string; } - export interface Q { - rows: [QRows, ...QRows[]]; - } - export interface QRows { k: Node; v: Node; } + + export interface Q { + rows: [QRows, ...QRows[]]; + } "); } @@ -242,14 +242,14 @@ fn captured_sequence_creates_struct() { text: string; } - export interface Q { - row: QRow; - } - export interface QRow { a: Node; b: Node; } + + export interface Q { + row: QRow; + } "); } @@ -375,10 +375,6 @@ fn tagged_alt_captured() { text: string; } - export interface Q { - result: QResult; - } - export interface QResultNum { $tag: "Num"; $data: { n: Node }; @@ -390,6 +386,10 @@ fn tagged_alt_captured() { } export type QResult = QResultNum | QResultStr; + + export interface Q { + result: QResult; + } "#); } @@ -408,15 +408,15 @@ fn nested_captured_group() { text: string; } - export interface Q { - name: Node; - pair: QPair; - } - export interface QPair { k: Node; v: Node; } + + export interface Q { + name: Node; + pair: QPair; + } "); } From 8eb6df30551c6ea3b9a66f800f72e0765475b5b6 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 15:00:43 -0300 Subject: [PATCH 06/18] Fix --- crates/plotnik-lib/src/query/dependencies.rs | 47 +++++++- .../src/query/type_check/context.rs | 110 ++++++++++++++---- .../src/query/type_check/emit_ts.rs | 12 +- .../plotnik-lib/src/query/type_check/mod.rs | 2 +- .../src/query/type_check/symbol.rs | 48 +++++++- .../plotnik-lib/src/query/type_check/types.rs | 6 +- 6 files changed, 191 insertions(+), 34 deletions(-) diff --git a/crates/plotnik-lib/src/query/dependencies.rs b/crates/plotnik-lib/src/query/dependencies.rs index 9a24369e..2cfd9c32 100644 --- a/crates/plotnik-lib/src/query/dependencies.rs +++ b/crates/plotnik-lib/src/query/dependencies.rs @@ -8,6 +8,8 @@ //! which is useful for passes that need to process dependencies before //! dependents (like type inference). +use std::collections::HashMap; + use indexmap::{IndexMap, IndexSet}; use super::source_map::SourceId; @@ -17,6 +19,7 @@ use crate::Diagnostics; use crate::diagnostics::DiagnosticKind; use crate::parser::{AnonymousNode, Def, Expr, NamedNode, Ref, Root, SeqExpr}; use crate::query::symbol_table::SymbolTable; +use crate::query::type_check::DefId; use crate::query::visitor::{Visitor, walk_expr}; /// Result of dependency analysis. @@ -29,14 +32,54 @@ pub struct DependencyAnalysis { /// - Definitions within an SCC are mutually recursive. /// - Every definition in the symbol table appears exactly once. pub sccs: Vec>, + + /// Maps definition name to its DefId. + name_to_def: HashMap, + + /// Maps DefId to definition name (indexed by DefId). + def_names: Vec, +} + +impl DependencyAnalysis { + /// Get the DefId for a definition name. + pub fn def_id(&self, name: &str) -> Option { + self.name_to_def.get(name).copied() + } + + /// Get the name for a DefId. + pub fn def_name(&self, id: DefId) -> &str { + &self.def_names[id.index()] + } + + /// Number of definitions. + pub fn def_count(&self) -> usize { + self.def_names.len() + } } /// Analyze dependencies between definitions. /// -/// Returns the SCCs in reverse topological order. +/// Returns the SCCs in reverse topological order, with DefId mappings. pub fn analyze_dependencies(symbol_table: &SymbolTable) -> DependencyAnalysis { let sccs = SccFinder::find(symbol_table); - DependencyAnalysis { sccs } + + // Assign DefIds in SCC order (leaves first, so dependencies get lower IDs) + let mut name_to_def = HashMap::new(); + let mut def_names = Vec::new(); + + for scc in &sccs { + for name in scc { + let def_id = DefId::from_raw(def_names.len() as u32); + name_to_def.insert(name.clone(), def_id); + def_names.push(name.clone()); + } + } + + DependencyAnalysis { + sccs, + name_to_def, + def_names, + } } /// Validate recursion using the pre-computed dependency analysis. diff --git a/crates/plotnik-lib/src/query/type_check/context.rs b/crates/plotnik-lib/src/query/type_check/context.rs index 1cf191e9..2664ab71 100644 --- a/crates/plotnik-lib/src/query/type_check/context.rs +++ b/crates/plotnik-lib/src/query/type_check/context.rs @@ -8,7 +8,7 @@ use std::collections::HashMap; use crate::parser::ast::Expr; -use super::symbol::{Interner, Symbol}; +use super::symbol::{DefId, Interner, Symbol}; use super::types::{Arity, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeId, TypeKind}; /// Central registry for types, symbols, and expression metadata. @@ -22,8 +22,12 @@ pub struct TypeContext { type_map: HashMap, /// Cached term info per expression term_info: HashMap, - /// Definition-level type info (for TypeScript emission) - def_types: HashMap, + /// Definition-level type info (for TypeScript emission), keyed by DefId + def_types: HashMap, + /// DefId → Symbol mapping (for resolving def names) + def_names: Vec, + /// Symbol → DefId reverse lookup + def_ids: HashMap, } impl Default for TypeContext { @@ -40,6 +44,8 @@ impl TypeContext { type_map: HashMap::new(), term_info: HashMap::new(), def_types: HashMap::new(), + def_names: Vec::new(), + def_ids: HashMap::new(), }; // Pre-register builtin types at their expected IDs @@ -118,33 +124,65 @@ impl TypeContext { self.term_info.get(expr) } + // ========== Definition registry ========== + + /// Register a definition by name, returning its DefId. + /// If already registered, returns existing DefId. + pub fn register_def(&mut self, name: &str) -> DefId { + let sym = self.interner.intern(name); + if let Some(&def_id) = self.def_ids.get(&sym) { + return def_id; + } + let def_id = DefId::from_raw(self.def_names.len() as u32); + self.def_names.push(sym); + self.def_ids.insert(sym, def_id); + def_id + } + + /// Get DefId for a definition name. + pub fn get_def_id(&self, name: &str) -> Option { + // Need to check if interned, avoid creating new symbol + for (&sym, &def_id) in &self.def_ids { + if self.interner.resolve(sym) == name { + return Some(def_id); + } + } + None + } + + /// Get the name Symbol for a DefId. + pub fn def_name_sym(&self, def_id: DefId) -> Symbol { + self.def_names[def_id.index()] + } + + /// Get the name string for a DefId. + pub fn def_name(&self, def_id: DefId) -> &str { + self.resolve(self.def_names[def_id.index()]) + } + // ========== Definition types ========== - /// Register the output type for a definition. - pub fn set_def_type(&mut self, name: Symbol, type_id: TypeId) { - self.def_types.insert(name, type_id); + /// Register the output type for a definition by DefId. + pub fn set_def_type(&mut self, def_id: DefId, type_id: TypeId) { + self.def_types.insert(def_id, type_id); } /// Register the output type for a definition by string name. + /// Registers the def if not already known. pub fn set_def_type_by_name(&mut self, name: &str, type_id: TypeId) { - let sym = self.interner.intern(name); - self.def_types.insert(sym, type_id); + let def_id = self.register_def(name); + self.def_types.insert(def_id, type_id); } - /// Get the output type for a definition. - pub fn get_def_type(&self, name: Symbol) -> Option { - self.def_types.get(&name).copied() + /// Get the output type for a definition by DefId. + pub fn get_def_type(&self, def_id: DefId) -> Option { + self.def_types.get(&def_id).copied() } /// Get the output type for a definition by string name. pub fn get_def_type_by_name(&self, name: &str) -> Option { - // Linear scan since we don't have reverse lookup without interning - for (&sym, &type_id) in &self.def_types { - if self.interner.resolve(sym) == name { - return Some(type_id); - } - } - None + self.get_def_id(name) + .and_then(|id| self.def_types.get(&id).copied()) } /// Get arity for an expression (for backward compatibility with expr_arity). @@ -167,9 +205,16 @@ impl TypeContext { self.types.len() } - /// Iterate over all definition types. - pub fn iter_def_types(&self) -> impl Iterator + '_ { - self.def_types.iter().map(|(&sym, &type_id)| (sym, type_id)) + /// Iterate over all definition types as (DefId, TypeId). + pub fn iter_def_types(&self) -> impl Iterator + '_ { + self.def_types + .iter() + .map(|(&def_id, &type_id)| (def_id, type_id)) + } + + /// Number of registered definitions. + pub fn def_count(&self) -> usize { + self.def_names.len() } } @@ -236,4 +281,27 @@ mod tests { assert_eq!(ctx.get_def_type_by_name("Query"), Some(TYPE_NODE)); assert_eq!(ctx.get_def_type_by_name("Missing"), None); } + + #[test] + fn register_def_returns_stable_id() { + let mut ctx = TypeContext::new(); + + let id1 = ctx.register_def("Foo"); + let id2 = ctx.register_def("Bar"); + let id3 = ctx.register_def("Foo"); // duplicate + + assert_eq!(id1, id3); + assert_ne!(id1, id2); + assert_eq!(ctx.def_name(id1), "Foo"); + assert_eq!(ctx.def_name(id2), "Bar"); + } + + #[test] + fn def_id_lookup() { + let mut ctx = TypeContext::new(); + + ctx.register_def("Query"); + assert!(ctx.get_def_id("Query").is_some()); + assert!(ctx.get_def_id("Missing").is_none()); + } } diff --git a/crates/plotnik-lib/src/query/type_check/emit_ts.rs b/crates/plotnik-lib/src/query/type_check/emit_ts.rs index 0652430d..a18bbc67 100644 --- a/crates/plotnik-lib/src/query/type_check/emit_ts.rs +++ b/crates/plotnik-lib/src/query/type_check/emit_ts.rs @@ -87,7 +87,7 @@ impl<'a> TsEmitter<'a> { let def_names: HashMap = self .ctx .iter_def_types() - .map(|(sym, id)| (id, self.ctx.resolve(sym).to_string())) + .map(|(def_id, type_id)| (type_id, self.ctx.def_name(def_id).to_string())) .collect(); // Compute topological order (leaves first) @@ -277,8 +277,8 @@ impl<'a> TsEmitter<'a> { fn collect_type_names_with_context(&mut self) { // Reserve definition names first - for (sym, _) in self.ctx.iter_def_types() { - let name = self.ctx.resolve(sym); + for (def_id, _) in self.ctx.iter_def_types() { + let name = self.ctx.def_name(def_id); let pascal_name = to_pascal_case(name); self.used_names.insert(pascal_name); } @@ -286,8 +286,8 @@ impl<'a> TsEmitter<'a> { // Collect naming contexts by traversing definition types let mut type_contexts: HashMap = HashMap::new(); - for (sym, type_id) in self.ctx.iter_def_types() { - let def_name = self.ctx.resolve(sym); + for (def_id, type_id) in self.ctx.iter_def_types() { + let def_name = self.ctx.def_name(def_id); self.collect_contexts_for_type( type_id, &NamingContext { @@ -600,7 +600,7 @@ impl<'a> TsEmitter<'a> { TypeKind::Node => "Node".to_string(), TypeKind::String => "string".to_string(), TypeKind::Custom(sym) => self.ctx.resolve(*sym).to_string(), - TypeKind::Ref(sym) => to_pascal_case(self.ctx.resolve(*sym)), + TypeKind::Ref(def_id) => to_pascal_case(self.ctx.def_name(*def_id)), TypeKind::Struct(fields) => { if let Some(name) = self.type_names.get(&type_id) { diff --git a/crates/plotnik-lib/src/query/type_check/mod.rs b/crates/plotnik-lib/src/query/type_check/mod.rs index 72cde7ca..545dac1a 100644 --- a/crates/plotnik-lib/src/query/type_check/mod.rs +++ b/crates/plotnik-lib/src/query/type_check/mod.rs @@ -14,7 +14,7 @@ mod unify; pub use context::TypeContext; pub use emit_ts::{EmitConfig, TsEmitter, emit_typescript, emit_typescript_with_config}; -pub use symbol::{Interner, Symbol}; +pub use symbol::{DefId, Interner, Symbol}; pub use types::{ Arity, FieldInfo, QuantifierKind, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeFlow, TypeId, TypeKind, diff --git a/crates/plotnik-lib/src/query/type_check/symbol.rs b/crates/plotnik-lib/src/query/type_check/symbol.rs index bb2116ef..fec5211c 100644 --- a/crates/plotnik-lib/src/query/type_check/symbol.rs +++ b/crates/plotnik-lib/src/query/type_check/symbol.rs @@ -1,7 +1,9 @@ -//! Symbol interning for field and type names. +//! Symbol interning for field and type names, plus definition identifiers. //! //! Converts heap-allocated strings into cheap integer handles. //! Comparing two symbols is O(1) integer comparison. +//! +//! `DefId` identifies named definitions (like `Foo = ...`) by stable index. use std::collections::HashMap; @@ -34,6 +36,33 @@ impl Ord for Symbol { } } +/// A lightweight handle to a named definition. +/// +/// Assigned during dependency analysis. Enables O(1) lookup of definition +/// properties without string comparison. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct DefId(u32); + +impl DefId { + /// Create a DefId from a raw index. Use only for deserialization. + #[inline] + pub fn from_raw(index: u32) -> Self { + Self(index) + } + + /// Raw index for serialization/debugging. + #[inline] + pub fn as_u32(self) -> u32 { + self.0 + } + + /// Index for array access. + #[inline] + pub fn index(self) -> usize { + self.0 as usize + } +} + /// String interner. Deduplicates strings and returns cheap Symbol handles. #[derive(Debug, Clone, Default)] pub struct Interner { @@ -157,4 +186,21 @@ mod tests { // z was inserted first, so z < a by insertion order assert!(z < a); } + + #[test] + fn def_id_roundtrip() { + let id = DefId::from_raw(42); + assert_eq!(id.as_u32(), 42); + assert_eq!(id.index(), 42); + } + + #[test] + fn def_id_equality() { + let a = DefId::from_raw(1); + let b = DefId::from_raw(1); + let c = DefId::from_raw(2); + + assert_eq!(a, b); + assert_ne!(a, c); + } } diff --git a/crates/plotnik-lib/src/query/type_check/types.rs b/crates/plotnik-lib/src/query/type_check/types.rs index 5b4a7d8c..3c067a6f 100644 --- a/crates/plotnik-lib/src/query/type_check/types.rs +++ b/crates/plotnik-lib/src/query/type_check/types.rs @@ -6,7 +6,7 @@ use std::collections::BTreeMap; -use super::symbol::Symbol; +use super::symbol::{DefId, Symbol}; /// Interned type identifier. Types are stored in TypeContext and referenced by ID. #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] @@ -44,8 +44,8 @@ pub enum TypeKind { Array { element: TypeId, non_empty: bool }, /// Optional wrapper Optional(TypeId), - /// Forward reference to a recursive type (name as Symbol) - Ref(Symbol), + /// Forward reference to a recursive type (resolved DefId) + Ref(DefId), } impl TypeKind { From 2607b980356f75326d94d7baf50ff14468b688d4 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 15:02:02 -0300 Subject: [PATCH 07/18] Fix clippy --- .../src/query/type_check/emit_ts.rs | 10 ++---- .../plotnik-lib/src/query/type_check/infer.rs | 32 ++++++++++--------- .../plotnik-lib/src/query/type_check/mod.rs | 20 ++++++------ 3 files changed, 30 insertions(+), 32 deletions(-) diff --git a/crates/plotnik-lib/src/query/type_check/emit_ts.rs b/crates/plotnik-lib/src/query/type_check/emit_ts.rs index a18bbc67..f2028014 100644 --- a/crates/plotnik-lib/src/query/type_check/emit_ts.rs +++ b/crates/plotnik-lib/src/query/type_check/emit_ts.rs @@ -328,9 +328,7 @@ impl<'a> TsEmitter<'a> { match kind { TypeKind::Struct(fields) => { // Only set context if this type needs a name - if !contexts.contains_key(&type_id) { - contexts.insert(type_id, ctx.clone()); - } + contexts.entry(type_id).or_insert_with(|| ctx.clone()); // Recurse into fields for (&field_sym, info) in fields { let field_name = self.ctx.resolve(field_sym); @@ -342,9 +340,7 @@ impl<'a> TsEmitter<'a> { } } TypeKind::Enum(variants) => { - if !contexts.contains_key(&type_id) { - contexts.insert(type_id, ctx.clone()); - } + contexts.entry(type_id).or_insert_with(|| ctx.clone()); // Don't recurse into variant types - they're inlined as $data let _ = variants; } @@ -645,7 +641,7 @@ impl<'a> TsEmitter<'a> { let field_strs: Vec<_> = sorted_fields .iter() - .map(|&(&sym, ref info)| { + .map(|&(&sym, info)| { let name = self.ctx.resolve(sym); let ts_type = self.type_to_ts(info.type_id); let optional = if info.optional { "?" } else { "" }; diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs index c8355cd8..92298620 100644 --- a/crates/plotnik-lib/src/query/type_check/infer.rs +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -85,9 +85,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { if let TypeFlow::Fields(fields) = child_info.flow { for (name, info) in fields { - if !merged_fields.contains_key(&name) { - merged_fields.insert(name, info); - } + merged_fields.entry(name).or_insert(info); } } } @@ -144,18 +142,22 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { if let TypeFlow::Fields(fields) = child_info.flow { for (name, info) in fields { - if merged_fields.contains_key(&name) { - // Duplicate capture in same scope - error - self.diag - .report( - self.source_id, - DiagnosticKind::DuplicateCaptureInScope, - child.text_range(), - ) - .message(self.ctx.resolve(name)) - .emit(); - } else { - merged_fields.insert(name, info); + use std::collections::btree_map::Entry; + match merged_fields.entry(name) { + Entry::Vacant(e) => { + e.insert(info); + } + Entry::Occupied(_) => { + // Duplicate capture in same scope - error + self.diag + .report( + self.source_id, + DiagnosticKind::DuplicateCaptureInScope, + child.text_range(), + ) + .message(self.ctx.resolve(name)) + .emit(); + } } } } diff --git a/crates/plotnik-lib/src/query/type_check/mod.rs b/crates/plotnik-lib/src/query/type_check/mod.rs index 545dac1a..dcc8d278 100644 --- a/crates/plotnik-lib/src/query/type_check/mod.rs +++ b/crates/plotnik-lib/src/query/type_check/mod.rs @@ -61,11 +61,11 @@ pub fn infer_types( infer_root(&mut ctx, symbol_table, diag, source_id, root); // Register the definition's output type - if let Some(body) = symbol_table.get(def_name) { - if let Some(info) = ctx.get_term_info(body).cloned() { - let type_id = flow_to_type_id(&mut ctx, &info.flow); - ctx.set_def_type_by_name(def_name, type_id); - } + if let Some(body) = symbol_table.get(def_name) + && let Some(info) = ctx.get_term_info(body).cloned() + { + let type_id = flow_to_type_id(&mut ctx, &info.flow); + ctx.set_def_type_by_name(def_name, type_id); } } } @@ -82,11 +82,11 @@ pub fn infer_types( infer_root(&mut ctx, symbol_table, diag, source_id, root); - if let Some(body) = symbol_table.get(name) { - if let Some(info) = ctx.get_term_info(body).cloned() { - let type_id = flow_to_type_id(&mut ctx, &info.flow); - ctx.set_def_type_by_name(name, type_id); - } + if let Some(body) = symbol_table.get(name) + && let Some(info) = ctx.get_term_info(body).cloned() + { + let type_id = flow_to_type_id(&mut ctx, &info.flow); + ctx.set_def_type_by_name(name, type_id); } } From d9d544f707eeb077863c0b58e7ddae982a6a26e2 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 15:10:33 -0300 Subject: [PATCH 08/18] Fixes --- .../src/query/type_check/context.rs | 26 ++++- .../plotnik-lib/src/query/type_check/infer.rs | 55 ++++++----- .../plotnik-lib/src/query/type_check/mod.rs | 5 +- .../plotnik-lib/src/query/type_check/types.rs | 40 +++----- .../plotnik-lib/src/query/type_check/unify.rs | 94 +++++++++++-------- 5 files changed, 126 insertions(+), 94 deletions(-) diff --git a/crates/plotnik-lib/src/query/type_check/context.rs b/crates/plotnik-lib/src/query/type_check/context.rs index 2664ab71..beb7d098 100644 --- a/crates/plotnik-lib/src/query/type_check/context.rs +++ b/crates/plotnik-lib/src/query/type_check/context.rs @@ -4,12 +4,14 @@ //! Symbols are interned to enable cheap string comparison. //! TermInfo is cached per-expression to avoid recomputation. -use std::collections::HashMap; +use std::collections::{BTreeMap, HashMap}; use crate::parser::ast::Expr; use super::symbol::{DefId, Interner, Symbol}; -use super::types::{Arity, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeId, TypeKind}; +use super::types::{ + Arity, FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeId, TypeKind, +}; /// Central registry for types, symbols, and expression metadata. #[derive(Debug, Clone)] @@ -112,6 +114,26 @@ impl TypeContext { (id, &self.types[id.0 as usize]) } + /// Intern a struct type from fields. + pub fn intern_struct(&mut self, fields: BTreeMap) -> TypeId { + self.intern_type(TypeKind::Struct(fields)) + } + + /// Intern a struct type with a single field. + pub fn intern_single_field(&mut self, name: Symbol, info: FieldInfo) -> TypeId { + let mut fields = BTreeMap::new(); + fields.insert(name, info); + self.intern_type(TypeKind::Struct(fields)) + } + + /// Get struct fields from a TypeId, if it points to a Struct. + pub fn get_struct_fields(&self, id: TypeId) -> Option<&BTreeMap> { + match self.get_type(id)? { + TypeKind::Struct(fields) => Some(fields), + _ => None, + } + } + // ========== Term info cache ========== /// Cache term info for an expression. diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs index 92298620..8d2ecfa8 100644 --- a/crates/plotnik-lib/src/query/type_check/infer.rs +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -83,9 +83,11 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { for child in node.children() { let child_info = self.infer_expr(&child); - if let TypeFlow::Fields(fields) = child_info.flow { + if let TypeFlow::Bubble(type_id) = child_info.flow + && let Some(fields) = self.ctx.get_struct_fields(type_id) + { for (name, info) in fields { - merged_fields.entry(name).or_insert(info); + merged_fields.entry(*name).or_insert(*info); } } } @@ -93,7 +95,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { let flow = if merged_fields.is_empty() { TypeFlow::Void } else { - TypeFlow::Fields(merged_fields) + TypeFlow::Bubble(self.ctx.intern_struct(merged_fields)) }; TermInfo::new(Arity::One, flow) @@ -140,8 +142,10 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { for child in &children { let child_info = self.infer_expr(child); - if let TypeFlow::Fields(fields) = child_info.flow { - for (name, info) in fields { + if let TypeFlow::Bubble(type_id) = child_info.flow + && let Some(fields) = self.ctx.get_struct_fields(type_id) + { + for (&name, &info) in fields { use std::collections::btree_map::Entry; match merged_fields.entry(name) { Entry::Vacant(e) => { @@ -168,7 +172,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { let flow = if merged_fields.is_empty() { TypeFlow::Void } else { - TypeFlow::Fields(merged_fields) + TypeFlow::Bubble(self.ctx.intern_struct(merged_fields)) }; TermInfo::new(arity, flow) @@ -236,7 +240,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { } // Unify all flows - let unified_flow = match unify_flows(flows) { + let unified_flow = match unify_flows(self.ctx, flows) { Ok(flow) => flow, Err(err) => { self.report_unify_error(alt.text_range(), &err); @@ -276,7 +280,10 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { let type_id = annotation_type.unwrap_or(TYPE_NODE); return TermInfo::new( Arity::One, - TypeFlow::single_field(capture_name, FieldInfo::required(type_id)), + TypeFlow::Bubble( + self.ctx + .intern_single_field(capture_name, FieldInfo::required(type_id)), + ), ); }; @@ -305,13 +312,9 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { // @name on Scalar → capture that scalar type annotation_type.unwrap_or(*type_id) } - TypeFlow::Fields(fields) => { - // @name on Fields → create Struct from fields, capture that - if let Some(annotated) = annotation_type { - annotated - } else { - self.ctx.intern_type(TypeKind::Struct(fields.clone())) - } + TypeFlow::Bubble(type_id) => { + // @name on Bubble → capture the struct type directly + annotation_type.unwrap_or(*type_id) } }; @@ -323,7 +326,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { TermInfo::new( inner_info.arity, - TypeFlow::single_field(capture_name, field_info), + TypeFlow::Bubble(self.ctx.intern_single_field(capture_name, field_info)), ) } @@ -357,13 +360,18 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { TypeFlow::Scalar(t) => { TypeFlow::Scalar(self.ctx.intern_type(TypeKind::Optional(t))) } - TypeFlow::Fields(fields) => { + TypeFlow::Bubble(type_id) => { // Make all fields optional + let fields = self + .ctx + .get_struct_fields(type_id) + .cloned() + .unwrap_or_default(); let optional_fields = fields .into_iter() .map(|(k, v)| (k, v.make_optional())) .collect(); - TypeFlow::Fields(optional_fields) + TypeFlow::Bubble(self.ctx.intern_struct(optional_fields)) } }; TermInfo::new(inner_info.arity, flow) @@ -392,11 +400,10 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { }); TypeFlow::Scalar(array_type) } - TypeFlow::Fields(fields) => { + TypeFlow::Bubble(struct_type) => { // Fields with * or + and no row capture is an error // (already reported by check_strict_dimensionality if !is_row_capture) // Return array of struct as best-effort - let struct_type = self.ctx.intern_type(TypeKind::Struct(fields)); let array_type = self.ctx.intern_type(TypeKind::Array { element: struct_type, non_empty: quantifier.is_non_empty(), @@ -449,7 +456,8 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { /// Check strict dimensionality rule for * and + quantifiers. fn check_strict_dimensionality(&mut self, quant: &QuantifiedExpr, inner_info: &TermInfo) { // If inner has fields (captures), that's a violation - if let TypeFlow::Fields(fields) = &inner_info.flow + if let TypeFlow::Bubble(type_id) = &inner_info.flow + && let Some(fields) = self.ctx.get_struct_fields(*type_id) && !fields.is_empty() { let op = quant @@ -494,9 +502,8 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { /// Convert a TypeFlow to a TypeId for storage in enum variants, etc. fn flow_to_type(&mut self, flow: &TypeFlow) -> TypeId { match flow { - TypeFlow::Void => self.ctx.intern_type(TypeKind::Struct(BTreeMap::new())), - TypeFlow::Scalar(t) => *t, - TypeFlow::Fields(fields) => self.ctx.intern_type(TypeKind::Struct(fields.clone())), + TypeFlow::Void => self.ctx.intern_struct(BTreeMap::new()), + TypeFlow::Scalar(t) | TypeFlow::Bubble(t) => *t, } } diff --git a/crates/plotnik-lib/src/query/type_check/mod.rs b/crates/plotnik-lib/src/query/type_check/mod.rs index dcc8d278..f6a4cbc3 100644 --- a/crates/plotnik-lib/src/query/type_check/mod.rs +++ b/crates/plotnik-lib/src/query/type_check/mod.rs @@ -96,9 +96,8 @@ pub fn infer_types( /// Convert a TypeFlow to a TypeId for storage. fn flow_to_type_id(ctx: &mut TypeContext, flow: &TypeFlow) -> TypeId { match flow { - TypeFlow::Void => ctx.intern_type(TypeKind::Struct(BTreeMap::new())), - TypeFlow::Scalar(type_id) => *type_id, - TypeFlow::Fields(fields) => ctx.intern_type(TypeKind::Struct(fields.clone())), + TypeFlow::Void => ctx.intern_struct(BTreeMap::new()), + TypeFlow::Scalar(type_id) | TypeFlow::Bubble(type_id) => *type_id, } } diff --git a/crates/plotnik-lib/src/query/type_check/types.rs b/crates/plotnik-lib/src/query/type_check/types.rs index 3c067a6f..a553c010 100644 --- a/crates/plotnik-lib/src/query/type_check/types.rs +++ b/crates/plotnik-lib/src/query/type_check/types.rs @@ -131,10 +131,10 @@ impl TermInfo { } } - pub fn fields(arity: Arity, fields: BTreeMap) -> Self { + pub fn bubble(arity: Arity, struct_type_id: TypeId) -> Self { Self { arity, - flow: TypeFlow::Fields(fields), + flow: TypeFlow::Bubble(struct_type_id), } } } @@ -164,16 +164,17 @@ impl Arity { /// /// Determines what data an expression contributes to output: /// - Void: Transparent, produces nothing (used for structural matching) -/// - Scalar: Opaque single value (captures, refs create scope boundaries) -/// - Fields: Transparent field contributions that bubble to parent +/// - Scalar: Opaque single value that doesn't bubble (scope boundary) +/// - Bubble: Struct type whose fields bubble to parent scope #[derive(Clone, Debug)] pub enum TypeFlow { /// Transparent, produces nothing Void, /// Opaque single value that doesn't bubble Scalar(TypeId), - /// Transparent fields that bubble to parent scope (keys are interned Symbols) - Fields(BTreeMap), + /// Struct type with fields that bubble to parent scope. + /// The TypeId must point to a TypeKind::Struct. + Bubble(TypeId), } impl TypeFlow { @@ -185,32 +186,17 @@ impl TypeFlow { matches!(self, TypeFlow::Scalar(_)) } - pub fn is_fields(&self) -> bool { - matches!(self, TypeFlow::Fields(_)) + pub fn is_bubble(&self) -> bool { + matches!(self, TypeFlow::Bubble(_)) } - pub fn has_captures(&self) -> bool { + /// Get the TypeId if this is Scalar or Bubble + pub fn type_id(&self) -> Option { match self { - TypeFlow::Void => false, - TypeFlow::Scalar(_) => false, - TypeFlow::Fields(f) => !f.is_empty(), + TypeFlow::Void => None, + TypeFlow::Scalar(id) | TypeFlow::Bubble(id) => Some(*id), } } - - /// Get field symbols if this is a Fields flow - pub fn field_symbols(&self) -> Option> { - match self { - TypeFlow::Fields(f) => Some(f.keys().copied().collect()), - _ => None, - } - } - - /// Create a single-field flow - pub fn single_field(name: Symbol, info: FieldInfo) -> Self { - let mut fields = BTreeMap::new(); - fields.insert(name, info); - TypeFlow::Fields(fields) - } } /// Quantifier kind for type inference diff --git a/crates/plotnik-lib/src/query/type_check/unify.rs b/crates/plotnik-lib/src/query/type_check/unify.rs index cec0f838..f423b1fc 100644 --- a/crates/plotnik-lib/src/query/type_check/unify.rs +++ b/crates/plotnik-lib/src/query/type_check/unify.rs @@ -5,8 +5,9 @@ use std::collections::BTreeMap; +use super::context::TypeContext; use super::symbol::Symbol; -use super::types::{FieldInfo, TYPE_NODE, TypeFlow, TypeId}; +use super::types::{FieldInfo, TYPE_NODE, TYPE_VOID, TypeFlow, TypeId}; /// Error during type unification. #[derive(Clone, Debug)] @@ -36,18 +37,25 @@ impl UnifyError { /// /// Rules: /// - Void ∪ Void → Void -/// - Void ∪ Fields(f) → Fields(make_all_optional(f)) -/// - Fields(a) ∪ Fields(b) → Fields(merge_fields(a, b)) +/// - Void ∪ Bubble(s) → Bubble(make_all_optional(s)) +/// - Bubble(a) ∪ Bubble(b) → Bubble(merge_fields(a, b)) /// - Scalar in untagged → Error (use tagged alternation instead) -pub fn unify_flow(a: TypeFlow, b: TypeFlow) -> Result { +pub fn unify_flow(ctx: &mut TypeContext, a: TypeFlow, b: TypeFlow) -> Result { match (a, b) { (TypeFlow::Void, TypeFlow::Void) => Ok(TypeFlow::Void), - (TypeFlow::Void, TypeFlow::Fields(f)) | (TypeFlow::Fields(f), TypeFlow::Void) => { - Ok(TypeFlow::Fields(make_all_optional(f))) + (TypeFlow::Void, TypeFlow::Bubble(id)) | (TypeFlow::Bubble(id), TypeFlow::Void) => { + let fields = ctx.get_struct_fields(id).cloned().unwrap_or_default(); + let optional_fields = make_all_optional(fields); + Ok(TypeFlow::Bubble(ctx.intern_struct(optional_fields))) } - (TypeFlow::Fields(a), TypeFlow::Fields(b)) => Ok(TypeFlow::Fields(merge_fields(a, b)?)), + (TypeFlow::Bubble(a_id), TypeFlow::Bubble(b_id)) => { + let a_fields = ctx.get_struct_fields(a_id).cloned().unwrap_or_default(); + let b_fields = ctx.get_struct_fields(b_id).cloned().unwrap_or_default(); + let merged = merge_fields(a_fields, b_fields)?; + Ok(TypeFlow::Bubble(ctx.intern_struct(merged))) + } // Scalars can't appear in untagged alternations (TypeFlow::Scalar(_), _) | (_, TypeFlow::Scalar(_)) => Err(UnifyError::ScalarInUntagged), @@ -55,13 +63,16 @@ pub fn unify_flow(a: TypeFlow, b: TypeFlow) -> Result { } /// Unify multiple flows from alternation branches. -pub fn unify_flows(flows: impl IntoIterator) -> Result { +pub fn unify_flows( + ctx: &mut TypeContext, + flows: impl IntoIterator, +) -> Result { let mut iter = flows.into_iter(); let Some(first) = iter.next() else { return Ok(TypeFlow::Void); }; - iter.try_fold(first, unify_flow) + iter.try_fold(first, |acc, flow| unify_flow(ctx, acc, flow)) } /// Make all fields in a map optional. @@ -126,6 +137,14 @@ fn unify_type_ids(a: TypeId, b: TypeId, field: Symbol) -> Result Result Symbol { - // For tests, create symbols directly. In real code, use Interner. - // This is safe because tests don't need actual string resolution. - unsafe { std::mem::transmute(n) } - } - #[test] fn unify_void_void() { - let result = unify_flow(TypeFlow::Void, TypeFlow::Void); + let mut ctx = TypeContext::new(); + let result = unify_flow(&mut ctx, TypeFlow::Void, TypeFlow::Void); assert!(matches!(result, Ok(TypeFlow::Void))); } #[test] - fn unify_void_fields() { - let x = make_symbol(0); - let mut fields = BTreeMap::new(); - fields.insert(x, FieldInfo::required(TYPE_NODE)); + fn unify_void_bubble() { + let mut ctx = TypeContext::new(); + let x = ctx.intern("x"); + let struct_id = ctx.intern_single_field(x, FieldInfo::required(TYPE_NODE)); - let result = unify_flow(TypeFlow::Void, TypeFlow::Fields(fields)).unwrap(); + let result = unify_flow(&mut ctx, TypeFlow::Void, TypeFlow::Bubble(struct_id)).unwrap(); match result { - TypeFlow::Fields(f) => { - assert!(f.get(&x).unwrap().optional); + TypeFlow::Bubble(id) => { + let fields = ctx.get_struct_fields(id).unwrap(); + assert!(fields.get(&x).unwrap().optional); } - _ => panic!("expected Fields"), + _ => panic!("expected Bubble"), } } #[test] - fn unify_fields_merge() { - let x = make_symbol(0); - let y = make_symbol(1); + fn unify_bubble_merge() { + let mut ctx = TypeContext::new(); + let x = ctx.intern("x"); + let y = ctx.intern("y"); - let mut a = BTreeMap::new(); - a.insert(x, FieldInfo::required(TYPE_NODE)); + let a_id = ctx.intern_single_field(x, FieldInfo::required(TYPE_NODE)); - let mut b = BTreeMap::new(); - b.insert(x, FieldInfo::required(TYPE_NODE)); - b.insert(y, FieldInfo::required(TYPE_NODE)); + let mut b_fields = BTreeMap::new(); + b_fields.insert(x, FieldInfo::required(TYPE_NODE)); + b_fields.insert(y, FieldInfo::required(TYPE_NODE)); + let b_id = ctx.intern_struct(b_fields); - let result = unify_flow(TypeFlow::Fields(a), TypeFlow::Fields(b)).unwrap(); + let result = unify_flow(&mut ctx, TypeFlow::Bubble(a_id), TypeFlow::Bubble(b_id)).unwrap(); match result { - TypeFlow::Fields(f) => { + TypeFlow::Bubble(id) => { + let fields = ctx.get_struct_fields(id).unwrap(); // x is in both, so required - assert!(!f.get(&x).unwrap().optional); + assert!(!fields.get(&x).unwrap().optional); // y only in b, so optional - assert!(f.get(&y).unwrap().optional); + assert!(fields.get(&y).unwrap().optional); } - _ => panic!("expected Fields"), + _ => panic!("expected Bubble"), } } #[test] fn unify_scalar_error() { - let result = unify_flow(TypeFlow::Scalar(TYPE_NODE), TypeFlow::Void); + let mut ctx = TypeContext::new(); + let result = unify_flow(&mut ctx, TypeFlow::Scalar(TYPE_NODE), TypeFlow::Void); assert!(matches!(result, Err(UnifyError::ScalarInUntagged))); } } From b694262cde8734f12915e82c61a1f545cafcc308 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 15:32:55 -0300 Subject: [PATCH 09/18] Refactor --- crates/plotnik-core/src/interner.rs | 234 ++++++++++++++++++ crates/plotnik-core/src/lib.rs | 3 + crates/plotnik-lib/src/query/dependencies.rs | 58 ++++- crates/plotnik-lib/src/query/dump.rs | 2 +- crates/plotnik-lib/src/query/query.rs | 22 +- .../src/query/type_check/context.rs | 121 ++++----- .../src/query/type_check/emit_ts.rs | 65 +++-- .../plotnik-lib/src/query/type_check/infer.rs | 24 +- .../plotnik-lib/src/query/type_check/mod.rs | 17 +- .../src/query/type_check/symbol.rs | 157 +----------- .../plotnik-lib/src/query/type_check/unify.rs | 9 +- 11 files changed, 440 insertions(+), 272 deletions(-) create mode 100644 crates/plotnik-core/src/interner.rs diff --git a/crates/plotnik-core/src/interner.rs b/crates/plotnik-core/src/interner.rs new file mode 100644 index 00000000..36ad50cd --- /dev/null +++ b/crates/plotnik-core/src/interner.rs @@ -0,0 +1,234 @@ +//! String interning for efficient string deduplication and comparison. +//! +//! Converts heap-allocated strings into cheap integer handles (`Symbol`). +//! Comparing two symbols is O(1) integer comparison. +//! +//! The interner can be serialized to a binary blob format for the compiled query. + +use std::collections::HashMap; + +/// A lightweight handle to an interned string. +/// +/// Comparing two symbols is O(1). Symbols are ordered by insertion order, +/// not lexicographically—use `Interner::resolve` if you need string ordering. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct Symbol(u32); + +impl Symbol { + /// Raw index for serialization/debugging. + #[inline] + pub fn as_u32(self) -> u32 { + self.0 + } + + /// Create a Symbol from a raw index. Use only for deserialization. + #[inline] + pub fn from_raw(index: u32) -> Self { + Self(index) + } +} + +impl PartialOrd for Symbol { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Symbol { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.0.cmp(&other.0) + } +} + +/// String interner. Deduplicates strings and returns cheap Symbol handles. +#[derive(Debug, Clone, Default)] +pub struct Interner { + /// Map from string to symbol for deduplication. + map: HashMap, + /// Storage for interned strings, indexed by Symbol. + strings: Vec, +} + +impl Interner { + pub fn new() -> Self { + Self::default() + } + + /// Intern a string, returning its Symbol. + /// If the string was already interned, returns the existing Symbol. + pub fn intern(&mut self, s: &str) -> Symbol { + if let Some(&sym) = self.map.get(s) { + return sym; + } + + let sym = Symbol(self.strings.len() as u32); + self.strings.push(s.to_owned()); + self.map.insert(s.to_owned(), sym); + sym + } + + /// Intern an owned string, avoiding clone if not already present. + pub fn intern_owned(&mut self, s: String) -> Symbol { + if let Some(&sym) = self.map.get(&s) { + return sym; + } + + let sym = Symbol(self.strings.len() as u32); + self.strings.push(s.clone()); + self.map.insert(s, sym); + sym + } + + /// Resolve a Symbol back to its string. + /// + /// # Panics + /// Panics if the symbol was not created by this interner. + #[inline] + pub fn resolve(&self, sym: Symbol) -> &str { + &self.strings[sym.0 as usize] + } + + /// Try to resolve a Symbol, returning None if invalid. + #[inline] + pub fn try_resolve(&self, sym: Symbol) -> Option<&str> { + self.strings.get(sym.0 as usize).map(|s| s.as_str()) + } + + /// Number of interned strings. + #[inline] + pub fn len(&self) -> usize { + self.strings.len() + } + + /// Whether the interner is empty. + #[inline] + pub fn is_empty(&self) -> bool { + self.strings.is_empty() + } + + /// Iterate over all interned strings with their symbols. + #[inline] + pub fn iter(&self) -> impl Iterator { + self.strings + .iter() + .enumerate() + .map(|(i, s)| (Symbol(i as u32), s.as_str())) + } + + /// Emit as binary format blob and offset table. + /// + /// Returns (concatenated UTF-8 bytes, offset for each string + sentinel). + /// The offsets array has `len() + 1` entries; the last is the total blob size. + pub fn to_blob(&self) -> (Vec, Vec) { + let mut blob = Vec::new(); + let mut offsets = Vec::with_capacity(self.strings.len() + 1); + + for s in &self.strings { + offsets.push(blob.len() as u32); + blob.extend_from_slice(s.as_bytes()); + } + offsets.push(blob.len() as u32); // sentinel for length calculation + + (blob, offsets) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn intern_deduplicates() { + let mut interner = Interner::new(); + + let a = interner.intern("foo"); + let b = interner.intern("foo"); + let c = interner.intern("bar"); + + assert_eq!(a, b); + assert_ne!(a, c); + assert_eq!(interner.len(), 2); + } + + #[test] + fn resolve_roundtrip() { + let mut interner = Interner::new(); + + let sym = interner.intern("hello"); + assert_eq!(interner.resolve(sym), "hello"); + } + + #[test] + fn intern_owned_avoids_clone_on_hit() { + let mut interner = Interner::new(); + + let a = interner.intern("test"); + let b = interner.intern_owned("test".to_string()); + + assert_eq!(a, b); + assert_eq!(interner.len(), 1); + } + + #[test] + fn symbols_are_copy() { + let mut interner = Interner::new(); + let sym = interner.intern("x"); + + let copy = sym; + assert_eq!(sym, copy); + } + + #[test] + fn symbol_ordering_is_insertion_order() { + let mut interner = Interner::new(); + + let z = interner.intern("z"); + let a = interner.intern("a"); + + // z was inserted first, so z < a by insertion order + assert!(z < a); + } + + #[test] + fn to_blob_produces_correct_format() { + let mut interner = Interner::new(); + interner.intern("id"); + interner.intern("foo"); + + let (blob, offsets) = interner.to_blob(); + + assert_eq!(blob, b"idfoo"); + assert_eq!(offsets, vec![0, 2, 5]); + + // Verify we can reconstruct strings + let s0 = &blob[offsets[0] as usize..offsets[1] as usize]; + let s1 = &blob[offsets[1] as usize..offsets[2] as usize]; + assert_eq!(s0, b"id"); + assert_eq!(s1, b"foo"); + } + + #[test] + fn to_blob_empty() { + let interner = Interner::new(); + let (blob, offsets) = interner.to_blob(); + + assert!(blob.is_empty()); + assert_eq!(offsets, vec![0]); // just the sentinel + } + + #[test] + fn iter_yields_all_strings() { + let mut interner = Interner::new(); + let a = interner.intern("alpha"); + let b = interner.intern("beta"); + + let items: Vec<_> = interner.iter().collect(); + assert_eq!(items, vec![(a, "alpha"), (b, "beta")]); + } + + #[test] + fn symbol_from_raw_roundtrip() { + let sym = Symbol::from_raw(42); + assert_eq!(sym.as_u32(), 42); + } +} diff --git a/crates/plotnik-core/src/lib.rs b/crates/plotnik-core/src/lib.rs index e3ab0a97..06bd615c 100644 --- a/crates/plotnik-core/src/lib.rs +++ b/crates/plotnik-core/src/lib.rs @@ -13,8 +13,11 @@ use std::collections::HashMap; use std::num::NonZeroU16; +mod interner; mod invariants; +pub use interner::{Interner, Symbol}; + /// Raw node definition from `node-types.json`. #[derive(Debug, Clone, serde::Deserialize)] pub struct RawNode { diff --git a/crates/plotnik-lib/src/query/dependencies.rs b/crates/plotnik-lib/src/query/dependencies.rs index 2cfd9c32..c00d8fc8 100644 --- a/crates/plotnik-lib/src/query/dependencies.rs +++ b/crates/plotnik-lib/src/query/dependencies.rs @@ -11,6 +11,7 @@ use std::collections::HashMap; use indexmap::{IndexMap, IndexSet}; +use plotnik_core::{Interner, Symbol}; use super::source_map::SourceId; use rowan::TextRange; @@ -33,34 +34,64 @@ pub struct DependencyAnalysis { /// - Every definition in the symbol table appears exactly once. pub sccs: Vec>, - /// Maps definition name to its DefId. - name_to_def: HashMap, + /// Maps definition name (Symbol) to its DefId. + name_to_def: HashMap, - /// Maps DefId to definition name (indexed by DefId). - def_names: Vec, + /// Maps DefId to definition name Symbol (indexed by DefId). + def_names: Vec, } impl DependencyAnalysis { - /// Get the DefId for a definition name. - pub fn def_id(&self, name: &str) -> Option { - self.name_to_def.get(name).copied() + /// Get the DefId for a definition by Symbol. + pub fn def_id_by_symbol(&self, sym: Symbol) -> Option { + self.name_to_def.get(&sym).copied() } - /// Get the name for a DefId. - pub fn def_name(&self, id: DefId) -> &str { - &self.def_names[id.index()] + /// Get the DefId for a definition name (requires interner for lookup). + pub fn def_id(&self, interner: &Interner, name: &str) -> Option { + // Linear scan - only used during analysis, not hot path + for (&sym, &def_id) in &self.name_to_def { + if interner.resolve(sym) == name { + return Some(def_id); + } + } + None + } + + /// Get the name Symbol for a DefId. + pub fn def_name_sym(&self, id: DefId) -> Symbol { + self.def_names[id.index()] + } + + /// Get the name string for a DefId. + pub fn def_name<'a>(&self, interner: &'a Interner, id: DefId) -> &'a str { + interner.resolve(self.def_names[id.index()]) } /// Number of definitions. pub fn def_count(&self) -> usize { self.def_names.len() } + + /// Get the def_names slice (for seeding TypeContext). + pub fn def_names(&self) -> &[Symbol] { + &self.def_names + } + + /// Get the name_to_def map (for seeding TypeContext). + pub fn name_to_def(&self) -> &HashMap { + &self.name_to_def + } } /// Analyze dependencies between definitions. /// /// Returns the SCCs in reverse topological order, with DefId mappings. -pub fn analyze_dependencies(symbol_table: &SymbolTable) -> DependencyAnalysis { +/// The interner is used to intern definition names as Symbols. +pub fn analyze_dependencies( + symbol_table: &SymbolTable, + interner: &mut Interner, +) -> DependencyAnalysis { let sccs = SccFinder::find(symbol_table); // Assign DefIds in SCC order (leaves first, so dependencies get lower IDs) @@ -69,9 +100,10 @@ pub fn analyze_dependencies(symbol_table: &SymbolTable) -> DependencyAnalysis { for scc in &sccs { for name in scc { + let sym = interner.intern(name); let def_id = DefId::from_raw(def_names.len() as u32); - name_to_def.insert(name.clone(), def_id); - def_names.push(name.clone()); + name_to_def.insert(sym, def_id); + def_names.push(sym); } } diff --git a/crates/plotnik-lib/src/query/dump.rs b/crates/plotnik-lib/src/query/dump.rs index a7f02dca..6b5c5d17 100644 --- a/crates/plotnik-lib/src/query/dump.rs +++ b/crates/plotnik-lib/src/query/dump.rs @@ -38,7 +38,7 @@ mod test_helpers { } pub fn emit_typescript(&self) -> String { - crate::query::type_check::emit_typescript(self.type_context()) + crate::query::type_check::emit_typescript(self.type_context(), self.interner()) } } } diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index c3969f2c..9e8ba7d5 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -4,7 +4,7 @@ use std::ops::{Deref, DerefMut}; use indexmap::IndexMap; -use plotnik_core::{NodeFieldId, NodeTypeId}; +use plotnik_core::{Interner, NodeFieldId, NodeTypeId}; use plotnik_langs::Lang; use crate::Diagnostics; @@ -105,10 +105,13 @@ impl QueryParsed { impl QueryParsed { pub fn analyze(mut self) -> QueryAnalyzed { + // Create shared interner for all phases + let mut interner = Interner::new(); + // Use reference-based structures for processing let symbol_table = resolve_names(&self.source_map, &self.ast_map, &mut self.diag); - let dependency_analysis = dependencies::analyze_dependencies(&symbol_table); + let dependency_analysis = dependencies::analyze_dependencies(&symbol_table, &mut interner); dependencies::validate_recursion( &dependency_analysis, &self.ast_map, @@ -119,16 +122,18 @@ impl QueryParsed { // Legacy arity table (to be removed once type_check is fully integrated) let arity_table = infer_arities(&self.ast_map, &symbol_table, &mut self.diag); - // New unified type checking pass + // New unified type checking pass - receives mutable interner reference let type_context = type_check::infer_types( &self.ast_map, &symbol_table, &dependency_analysis, &mut self.diag, + &mut interner, ); QueryAnalyzed { query_parsed: self, + interner, symbol_table, arity_table, type_context, @@ -152,6 +157,7 @@ pub type Query = QueryAnalyzed; pub struct QueryAnalyzed { query_parsed: QueryParsed, + interner: Interner, pub symbol_table: SymbolTable, arity_table: ExprArityTable, type_context: TypeContext, @@ -170,6 +176,10 @@ impl QueryAnalyzed { &self.type_context } + pub fn interner(&self) -> &Interner { + &self.interner + } + pub fn link(mut self, lang: &Lang) -> LinkedQuery { // Use reference-based hash maps during processing let mut type_ids: HashMap<&str, Option> = HashMap::new(); @@ -236,6 +246,12 @@ pub struct LinkedQuery { field_ids: NodeFieldIdTableOwned, } +impl LinkedQuery { + pub fn interner(&self) -> &Interner { + &self.inner.interner + } +} + impl Deref for LinkedQuery { type Target = QueryAnalyzed; diff --git a/crates/plotnik-lib/src/query/type_check/context.rs b/crates/plotnik-lib/src/query/type_check/context.rs index beb7d098..1bb066b4 100644 --- a/crates/plotnik-lib/src/query/type_check/context.rs +++ b/crates/plotnik-lib/src/query/type_check/context.rs @@ -1,13 +1,15 @@ //! TypeContext: manages interned types, symbols, and term info cache. //! //! Types are interned to enable cheap equality checks and cycle handling. -//! Symbols are interned to enable cheap string comparison. +//! Symbols are stored but resolved via external Interner reference. //! TermInfo is cached per-expression to avoid recomputation. -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; use crate::parser::ast::Expr; +use std::collections::HashMap; + use super::symbol::{DefId, Interner, Symbol}; use super::types::{ Arity, FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeId, TypeKind, @@ -16,8 +18,6 @@ use super::types::{ /// Central registry for types, symbols, and expression metadata. #[derive(Debug, Clone)] pub struct TypeContext { - /// String interner for field/type names - interner: Interner, /// Interned types by ID types: Vec, /// Deduplication map for type interning @@ -41,7 +41,6 @@ impl Default for TypeContext { impl TypeContext { pub fn new() -> Self { let mut ctx = Self { - interner: Interner::new(), types: Vec::new(), type_map: HashMap::new(), term_info: HashMap::new(), @@ -63,30 +62,11 @@ impl TypeContext { ctx } - // ========== Symbol interning ========== - - /// Intern a string, returning its Symbol. - #[inline] - pub fn intern(&mut self, s: &str) -> Symbol { - self.interner.intern(s) - } - - /// Intern an owned string. - #[inline] - pub fn intern_owned(&mut self, s: String) -> Symbol { - self.interner.intern_owned(s) - } - - /// Resolve a Symbol back to its string. - #[inline] - pub fn resolve(&self, sym: Symbol) -> &str { - self.interner.resolve(sym) - } - - /// Get a reference to the interner (for emission, etc.). - #[inline] - pub fn interner(&self) -> &Interner { - &self.interner + /// Seed definition mappings from DependencyAnalysis. + /// This avoids re-registering definitions that were already assigned DefIds. + pub fn seed_defs(&mut self, def_names: &[Symbol], name_to_def: &HashMap) { + self.def_names = def_names.to_vec(); + self.def_ids = name_to_def.clone(); } // ========== Type interning ========== @@ -150,8 +130,19 @@ impl TypeContext { /// Register a definition by name, returning its DefId. /// If already registered, returns existing DefId. - pub fn register_def(&mut self, name: &str) -> DefId { - let sym = self.interner.intern(name); + pub fn register_def(&mut self, interner: &mut Interner, name: &str) -> DefId { + let sym = interner.intern(name); + if let Some(&def_id) = self.def_ids.get(&sym) { + return def_id; + } + let def_id = DefId::from_raw(self.def_names.len() as u32); + self.def_names.push(sym); + self.def_ids.insert(sym, def_id); + def_id + } + + /// Register a definition by pre-interned Symbol, returning its DefId. + pub fn register_def_sym(&mut self, sym: Symbol) -> DefId { if let Some(&def_id) = self.def_ids.get(&sym) { return def_id; } @@ -161,11 +152,16 @@ impl TypeContext { def_id } - /// Get DefId for a definition name. - pub fn get_def_id(&self, name: &str) -> Option { - // Need to check if interned, avoid creating new symbol + /// Get DefId for a definition by Symbol. + pub fn get_def_id_sym(&self, sym: Symbol) -> Option { + self.def_ids.get(&sym).copied() + } + + /// Get DefId for a definition name (requires interner for lookup). + pub fn get_def_id(&self, interner: &Interner, name: &str) -> Option { + // Linear scan - only used during analysis, not hot path for (&sym, &def_id) in &self.def_ids { - if self.interner.resolve(sym) == name { + if interner.resolve(sym) == name { return Some(def_id); } } @@ -178,8 +174,8 @@ impl TypeContext { } /// Get the name string for a DefId. - pub fn def_name(&self, def_id: DefId) -> &str { - self.resolve(self.def_names[def_id.index()]) + pub fn def_name<'a>(&self, interner: &'a Interner, def_id: DefId) -> &'a str { + interner.resolve(self.def_names[def_id.index()]) } // ========== Definition types ========== @@ -191,8 +187,8 @@ impl TypeContext { /// Register the output type for a definition by string name. /// Registers the def if not already known. - pub fn set_def_type_by_name(&mut self, name: &str, type_id: TypeId) { - let def_id = self.register_def(name); + pub fn set_def_type_by_name(&mut self, interner: &mut Interner, name: &str, type_id: TypeId) { + let def_id = self.register_def(interner, name); self.def_types.insert(def_id, type_id); } @@ -202,8 +198,8 @@ impl TypeContext { } /// Get the output type for a definition by string name. - pub fn get_def_type_by_name(&self, name: &str) -> Option { - self.get_def_id(name) + pub fn get_def_type_by_name(&self, interner: &Interner, name: &str) -> Option { + self.get_def_id(interner, name) .and_then(|id| self.def_types.get(&id).copied()) } @@ -270,8 +266,9 @@ mod tests { #[test] fn struct_types_intern_correctly() { let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); - let x_sym = ctx.intern("x"); + let x_sym = interner.intern("x"); let mut fields = BTreeMap::new(); fields.insert(x_sym, FieldInfo::required(TYPE_NODE)); @@ -283,47 +280,53 @@ mod tests { #[test] fn symbol_interning_works() { - let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); - let a = ctx.intern("foo"); - let b = ctx.intern("foo"); - let c = ctx.intern("bar"); + let a = interner.intern("foo"); + let b = interner.intern("foo"); + let c = interner.intern("bar"); assert_eq!(a, b); assert_ne!(a, c); - assert_eq!(ctx.resolve(a), "foo"); - assert_eq!(ctx.resolve(c), "bar"); + assert_eq!(interner.resolve(a), "foo"); + assert_eq!(interner.resolve(c), "bar"); } #[test] fn def_type_by_name() { let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); - ctx.set_def_type_by_name("Query", TYPE_NODE); - assert_eq!(ctx.get_def_type_by_name("Query"), Some(TYPE_NODE)); - assert_eq!(ctx.get_def_type_by_name("Missing"), None); + ctx.set_def_type_by_name(&mut interner, "Query", TYPE_NODE); + assert_eq!( + ctx.get_def_type_by_name(&interner, "Query"), + Some(TYPE_NODE) + ); + assert_eq!(ctx.get_def_type_by_name(&interner, "Missing"), None); } #[test] fn register_def_returns_stable_id() { let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); - let id1 = ctx.register_def("Foo"); - let id2 = ctx.register_def("Bar"); - let id3 = ctx.register_def("Foo"); // duplicate + let id1 = ctx.register_def(&mut interner, "Foo"); + let id2 = ctx.register_def(&mut interner, "Bar"); + let id3 = ctx.register_def(&mut interner, "Foo"); // duplicate assert_eq!(id1, id3); assert_ne!(id1, id2); - assert_eq!(ctx.def_name(id1), "Foo"); - assert_eq!(ctx.def_name(id2), "Bar"); + assert_eq!(ctx.def_name(&interner, id1), "Foo"); + assert_eq!(ctx.def_name(&interner, id2), "Bar"); } #[test] fn def_id_lookup() { let mut ctx = TypeContext::new(); + let mut interner = Interner::new(); - ctx.register_def("Query"); - assert!(ctx.get_def_id("Query").is_some()); - assert!(ctx.get_def_id("Missing").is_none()); + ctx.register_def(&mut interner, "Query"); + assert!(ctx.get_def_id(&interner, "Query").is_some()); + assert!(ctx.get_def_id(&interner, "Missing").is_none()); } } diff --git a/crates/plotnik-lib/src/query/type_check/emit_ts.rs b/crates/plotnik-lib/src/query/type_check/emit_ts.rs index f2028014..55f128c2 100644 --- a/crates/plotnik-lib/src/query/type_check/emit_ts.rs +++ b/crates/plotnik-lib/src/query/type_check/emit_ts.rs @@ -5,6 +5,8 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; +use plotnik_core::Interner; + use super::context::TypeContext; use super::symbol::Symbol; use super::types::{FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TypeId, TypeKind}; @@ -43,6 +45,7 @@ impl Default for EmitConfig { /// TypeScript emitter. pub struct TsEmitter<'a> { ctx: &'a TypeContext, + interner: &'a Interner, config: EmitConfig, /// Generated type names, to avoid collisions used_names: BTreeSet, @@ -58,9 +61,10 @@ pub struct TsEmitter<'a> { } impl<'a> TsEmitter<'a> { - pub fn new(ctx: &'a TypeContext, config: EmitConfig) -> Self { + pub fn new(ctx: &'a TypeContext, interner: &'a Interner, config: EmitConfig) -> Self { Self { ctx, + interner, config, used_names: BTreeSet::new(), type_names: HashMap::new(), @@ -87,7 +91,12 @@ impl<'a> TsEmitter<'a> { let def_names: HashMap = self .ctx .iter_def_types() - .map(|(def_id, type_id)| (type_id, self.ctx.def_name(def_id).to_string())) + .map(|(def_id, type_id)| { + ( + type_id, + self.ctx.def_name(self.interner, def_id).to_string(), + ) + }) .collect(); // Compute topological order (leaves first) @@ -100,7 +109,7 @@ impl<'a> TsEmitter<'a> { } else if let Some(name) = self.type_names.get(&type_id).cloned() { self.emit_nested_type(type_id, &name); } else if let Some(TypeKind::Custom(sym)) = self.ctx.get_type(type_id) { - self.emit_custom_type_alias(self.ctx.resolve(*sym)); + self.emit_custom_type_alias(self.interner.resolve(*sym)); } } @@ -125,7 +134,7 @@ impl<'a> TsEmitter<'a> { if let Some(nested_name) = self.type_names.get(nested_id).cloned() { self.emit_nested_type(*nested_id, &nested_name); } else if let Some(TypeKind::Custom(sym)) = self.ctx.get_type(*nested_id) { - self.emit_custom_type_alias(self.ctx.resolve(*sym)); + self.emit_custom_type_alias(self.interner.resolve(*sym)); } } } @@ -278,7 +287,7 @@ impl<'a> TsEmitter<'a> { fn collect_type_names_with_context(&mut self) { // Reserve definition names first for (def_id, _) in self.ctx.iter_def_types() { - let name = self.ctx.def_name(def_id); + let name = self.ctx.def_name(self.interner, def_id); let pascal_name = to_pascal_case(name); self.used_names.insert(pascal_name); } @@ -287,7 +296,7 @@ impl<'a> TsEmitter<'a> { let mut type_contexts: HashMap = HashMap::new(); for (def_id, type_id) in self.ctx.iter_def_types() { - let def_name = self.ctx.def_name(def_id); + let def_name = self.ctx.def_name(self.interner, def_id); self.collect_contexts_for_type( type_id, &NamingContext { @@ -331,7 +340,7 @@ impl<'a> TsEmitter<'a> { contexts.entry(type_id).or_insert_with(|| ctx.clone()); // Recurse into fields for (&field_sym, info) in fields { - let field_name = self.ctx.resolve(field_sym); + let field_name = self.interner.resolve(field_sym); let field_ctx = NamingContext { def_name: ctx.def_name.clone(), field_name: Some(field_name.to_string()), @@ -502,10 +511,10 @@ impl<'a> TsEmitter<'a> { // Sort fields by resolved name for deterministic output let mut sorted_fields: Vec<_> = fields.iter().collect(); - sorted_fields.sort_by_key(|&(&sym, _)| self.ctx.resolve(sym)); + sorted_fields.sort_by_key(|&(&sym, _)| self.interner.resolve(sym)); for (&field_sym, info) in sorted_fields { - let field_name = self.ctx.resolve(field_sym); + let field_name = self.interner.resolve(field_sym); let ts_type = self.type_to_ts(info.type_id); let optional = if info.optional { "?" } else { "" }; self.output @@ -520,10 +529,10 @@ impl<'a> TsEmitter<'a> { // Sort variants by resolved name for deterministic output let mut sorted_variants: Vec<_> = variants.iter().collect(); - sorted_variants.sort_by_key(|&(&sym, _)| self.ctx.resolve(sym)); + sorted_variants.sort_by_key(|&(&sym, _)| self.interner.resolve(sym)); for (&variant_sym, &type_id) in sorted_variants { - let variant_name = self.ctx.resolve(variant_sym); + let variant_name = self.interner.resolve(variant_sym); let variant_type_name = format!("{}{}", name, to_pascal_case(variant_name)); variant_types.push(variant_type_name.clone()); @@ -595,8 +604,8 @@ impl<'a> TsEmitter<'a> { TypeKind::Void => "void".to_string(), TypeKind::Node => "Node".to_string(), TypeKind::String => "string".to_string(), - TypeKind::Custom(sym) => self.ctx.resolve(*sym).to_string(), - TypeKind::Ref(def_id) => to_pascal_case(self.ctx.def_name(*def_id)), + TypeKind::Custom(sym) => self.interner.resolve(*sym).to_string(), + TypeKind::Ref(def_id) => to_pascal_case(self.ctx.def_name(self.interner, *def_id)), TypeKind::Struct(fields) => { if let Some(name) = self.type_names.get(&type_id) { @@ -637,12 +646,12 @@ impl<'a> TsEmitter<'a> { // Sort fields by resolved name for deterministic output let mut sorted_fields: Vec<_> = fields.iter().collect(); - sorted_fields.sort_by_key(|&(&sym, _)| self.ctx.resolve(sym)); + sorted_fields.sort_by_key(|&(&sym, _)| self.interner.resolve(sym)); let field_strs: Vec<_> = sorted_fields .iter() .map(|&(&sym, info)| { - let name = self.ctx.resolve(sym); + let name = self.interner.resolve(sym); let ts_type = self.type_to_ts(info.type_id); let optional = if info.optional { "?" } else { "" }; format!("{}{}: {}", name, optional, ts_type) @@ -655,12 +664,12 @@ impl<'a> TsEmitter<'a> { fn inline_enum(&self, variants: &BTreeMap) -> String { // Sort variants by resolved name for deterministic output let mut sorted_variants: Vec<_> = variants.iter().collect(); - sorted_variants.sort_by_key(|&(&sym, _)| self.ctx.resolve(sym)); + sorted_variants.sort_by_key(|&(&sym, _)| self.interner.resolve(sym)); let variant_strs: Vec<_> = sorted_variants .iter() .map(|&(&sym, &type_id)| { - let name = self.ctx.resolve(sym); + let name = self.interner.resolve(sym); let data_type = self.type_to_ts(type_id); format!("{{ $tag: \"{}\"; $data: {} }}", name, data_type) }) @@ -690,13 +699,17 @@ fn to_pascal_case(s: &str) -> String { } /// Convenience function to emit TypeScript from a TypeContext. -pub fn emit_typescript(ctx: &TypeContext) -> String { - TsEmitter::new(ctx, EmitConfig::default()).emit() +pub fn emit_typescript(ctx: &TypeContext, interner: &Interner) -> String { + TsEmitter::new(ctx, interner, EmitConfig::default()).emit() } /// Emit TypeScript with custom configuration. -pub fn emit_typescript_with_config(ctx: &TypeContext, config: EmitConfig) -> String { - TsEmitter::new(ctx, config).emit() +pub fn emit_typescript_with_config( + ctx: &TypeContext, + interner: &Interner, + config: EmitConfig, +) -> String { + TsEmitter::new(ctx, interner, config).emit() } #[cfg(test)] @@ -716,18 +729,20 @@ mod tests { fn emit_node_type_only_when_referenced() { // Empty context - Node should not be emitted let ctx = TypeContext::new(); - let output = TsEmitter::new(&ctx, EmitConfig::default()).emit(); + let interner = Interner::new(); + let output = TsEmitter::new(&ctx, &interner, EmitConfig::default()).emit(); assert!(!output.contains("interface Node")); // Context with a definition using Node - should emit Node let mut ctx = TypeContext::new(); - let x_sym = ctx.intern("x"); + let mut interner = Interner::new(); + let x_sym = interner.intern("x"); let mut fields = BTreeMap::new(); fields.insert(x_sym, FieldInfo::required(TYPE_NODE)); let struct_id = ctx.intern_type(TypeKind::Struct(fields)); - ctx.set_def_type_by_name("Q", struct_id); + ctx.set_def_type_by_name(&mut interner, "Q", struct_id); - let output = TsEmitter::new(&ctx, EmitConfig::default()).emit(); + let output = TsEmitter::new(&ctx, &interner, EmitConfig::default()).emit(); assert!(output.contains("interface Node")); assert!(output.contains("kind: string")); } diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs index 8d2ecfa8..2b0b26f4 100644 --- a/crates/plotnik-lib/src/query/type_check/infer.rs +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -5,6 +5,8 @@ use std::collections::BTreeMap; +use plotnik_core::Interner; + use super::symbol::Symbol; use rowan::TextRange; @@ -28,6 +30,7 @@ use super::unify::{UnifyError, unify_flows}; /// Inference context for a single pass over the AST. pub struct InferenceVisitor<'a, 'd> { pub ctx: &'a mut TypeContext, + pub interner: &'a mut Interner, pub symbol_table: &'a SymbolTable, pub diag: &'d mut Diagnostics, pub source_id: SourceId, @@ -36,12 +39,14 @@ pub struct InferenceVisitor<'a, 'd> { impl<'a, 'd> InferenceVisitor<'a, 'd> { pub fn new( ctx: &'a mut TypeContext, + interner: &'a mut Interner, symbol_table: &'a SymbolTable, diag: &'d mut Diagnostics, source_id: SourceId, ) -> Self { Self { ctx, + interner, symbol_table, diag, source_id, @@ -159,7 +164,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { DiagnosticKind::DuplicateCaptureInScope, child.text_range(), ) - .message(self.ctx.resolve(name)) + .message(self.interner.resolve(name)) .emit(); } } @@ -196,7 +201,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { let Some(label) = branch.label() else { continue; }; - let label_sym = self.ctx.intern(label.text()); + let label_sym = self.interner.intern(label.text()); let Some(body) = branch.body() else { // Empty variant gets void/empty struct type @@ -260,7 +265,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { .map(|inner| self.infer_expr(&inner)) .unwrap_or_else(TermInfo::void); }; - let capture_name = self.ctx.intern(name_tok.text()); + let capture_name = self.interner.intern(name_tok.text()); // Check for type annotation let annotation_type = cap.type_annotation().and_then(|t| { @@ -269,7 +274,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { if type_name == "string" { TYPE_STRING } else { - let type_sym = self.ctx.intern(type_name); + let type_sym = self.interner.intern(type_name); self.ctx.intern_type(TypeKind::Custom(type_sym)) } }) @@ -467,7 +472,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { let capture_names: Vec<_> = fields .keys() - .map(|s| format!("`@{}`", self.ctx.resolve(*s))) + .map(|s| format!("`@{}`", self.interner.resolve(*s))) .collect(); let captures_str = capture_names.join(", "); @@ -515,15 +520,15 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { ), UnifyError::IncompatibleTypes { field } => ( DiagnosticKind::IncompatibleCaptureTypes, - self.ctx.resolve(*field).to_string(), + self.interner.resolve(*field).to_string(), ), UnifyError::IncompatibleStructs { field } => ( DiagnosticKind::IncompatibleStructShapes, - self.ctx.resolve(*field).to_string(), + self.interner.resolve(*field).to_string(), ), UnifyError::IncompatibleArrayElements { field } => ( DiagnosticKind::IncompatibleCaptureTypes, - self.ctx.resolve(*field).to_string(), + self.interner.resolve(*field).to_string(), ), }; @@ -560,11 +565,12 @@ impl Visitor for InferenceVisitor<'_, '_> { /// Run inference on all definitions in a root. pub fn infer_root( ctx: &mut TypeContext, + interner: &mut Interner, symbol_table: &SymbolTable, diag: &mut Diagnostics, source_id: SourceId, root: &Root, ) { - let mut visitor = InferenceVisitor::new(ctx, symbol_table, diag, source_id); + let mut visitor = InferenceVisitor::new(ctx, interner, symbol_table, diag, source_id); visitor.visit(root); } diff --git a/crates/plotnik-lib/src/query/type_check/mod.rs b/crates/plotnik-lib/src/query/type_check/mod.rs index f6a4cbc3..eddc8232 100644 --- a/crates/plotnik-lib/src/query/type_check/mod.rs +++ b/crates/plotnik-lib/src/query/type_check/mod.rs @@ -42,9 +42,16 @@ pub fn infer_types( symbol_table: &SymbolTable, dependency_analysis: &DependencyAnalysis, diag: &mut Diagnostics, + interner: &mut Interner, ) -> TypeContext { let mut ctx = TypeContext::new(); + // Seed def mappings from DependencyAnalysis (avoids re-registration) + ctx.seed_defs( + dependency_analysis.def_names(), + dependency_analysis.name_to_def(), + ); + // Process definitions in SCC order (leaves first) for scc in &dependency_analysis.sccs { for def_name in scc { @@ -58,21 +65,21 @@ pub fn infer_types( }; // Run inference on this root - infer_root(&mut ctx, symbol_table, diag, source_id, root); + infer_root(&mut ctx, interner, symbol_table, diag, source_id, root); // Register the definition's output type if let Some(body) = symbol_table.get(def_name) && let Some(info) = ctx.get_term_info(body).cloned() { let type_id = flow_to_type_id(&mut ctx, &info.flow); - ctx.set_def_type_by_name(def_name, type_id); + ctx.set_def_type_by_name(interner, def_name, type_id); } } } // Handle any definitions not in an SCC (shouldn't happen, but be safe) for (name, source_id, _body) in symbol_table.iter_full() { - if ctx.get_def_type_by_name(name).is_some() { + if ctx.get_def_type_by_name(interner, name).is_some() { continue; } @@ -80,13 +87,13 @@ pub fn infer_types( continue; }; - infer_root(&mut ctx, symbol_table, diag, source_id, root); + infer_root(&mut ctx, interner, symbol_table, diag, source_id, root); if let Some(body) = symbol_table.get(name) && let Some(info) = ctx.get_term_info(body).cloned() { let type_id = flow_to_type_id(&mut ctx, &info.flow); - ctx.set_def_type_by_name(name, type_id); + ctx.set_def_type_by_name(interner, name, type_id); } } diff --git a/crates/plotnik-lib/src/query/type_check/symbol.rs b/crates/plotnik-lib/src/query/type_check/symbol.rs index fec5211c..8feadf41 100644 --- a/crates/plotnik-lib/src/query/type_check/symbol.rs +++ b/crates/plotnik-lib/src/query/type_check/symbol.rs @@ -1,40 +1,9 @@ -//! Symbol interning for field and type names, plus definition identifiers. -//! -//! Converts heap-allocated strings into cheap integer handles. -//! Comparing two symbols is O(1) integer comparison. +//! Symbol interning and definition identifiers. //! +//! `Symbol` and `Interner` are re-exported from `plotnik_core`. //! `DefId` identifies named definitions (like `Foo = ...`) by stable index. -use std::collections::HashMap; - -/// A lightweight handle to an interned string. -/// -/// Comparing two symbols is O(1). Symbols are ordered by insertion order, -/// not lexicographically—use `Interner::resolve` if you need string ordering. -#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] -pub struct Symbol(u32); - -impl Symbol { - /// Raw index for serialization/debugging. - #[inline] - pub fn as_u32(self) -> u32 { - self.0 - } -} - -// Implement Ord based on raw index (insertion order). -// For deterministic output, sort by resolved string when needed. -impl PartialOrd for Symbol { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for Symbol { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.0.cmp(&other.0) - } -} +pub use plotnik_core::{Interner, Symbol}; /// A lightweight handle to a named definition. /// @@ -63,130 +32,10 @@ impl DefId { } } -/// String interner. Deduplicates strings and returns cheap Symbol handles. -#[derive(Debug, Clone, Default)] -pub struct Interner { - /// Map from string to symbol for deduplication. - map: HashMap, - /// Storage for interned strings, indexed by Symbol. - strings: Vec, -} - -impl Interner { - pub fn new() -> Self { - Self::default() - } - - /// Intern a string, returning its Symbol. - /// If the string was already interned, returns the existing Symbol. - pub fn intern(&mut self, s: &str) -> Symbol { - if let Some(&sym) = self.map.get(s) { - return sym; - } - - let sym = Symbol(self.strings.len() as u32); - self.strings.push(s.to_owned()); - self.map.insert(s.to_owned(), sym); - sym - } - - /// Intern an owned string, avoiding clone if not already present. - pub fn intern_owned(&mut self, s: String) -> Symbol { - if let Some(&sym) = self.map.get(&s) { - return sym; - } - - let sym = Symbol(self.strings.len() as u32); - self.strings.push(s.clone()); - self.map.insert(s, sym); - sym - } - - /// Resolve a Symbol back to its string. - /// - /// # Panics - /// Panics if the symbol was not created by this interner. - #[inline] - pub fn resolve(&self, sym: Symbol) -> &str { - &self.strings[sym.0 as usize] - } - - /// Try to resolve a Symbol, returning None if invalid. - #[inline] - pub fn try_resolve(&self, sym: Symbol) -> Option<&str> { - self.strings.get(sym.0 as usize).map(|s| s.as_str()) - } - - /// Number of interned strings. - #[inline] - pub fn len(&self) -> usize { - self.strings.len() - } - - /// Whether the interner is empty. - #[inline] - pub fn is_empty(&self) -> bool { - self.strings.is_empty() - } -} - #[cfg(test)] mod tests { use super::*; - #[test] - fn intern_deduplicates() { - let mut interner = Interner::new(); - - let a = interner.intern("foo"); - let b = interner.intern("foo"); - let c = interner.intern("bar"); - - assert_eq!(a, b); - assert_ne!(a, c); - assert_eq!(interner.len(), 2); - } - - #[test] - fn resolve_roundtrip() { - let mut interner = Interner::new(); - - let sym = interner.intern("hello"); - assert_eq!(interner.resolve(sym), "hello"); - } - - #[test] - fn intern_owned_avoids_clone_on_hit() { - let mut interner = Interner::new(); - - let a = interner.intern("test"); - let b = interner.intern_owned("test".to_string()); - - assert_eq!(a, b); - assert_eq!(interner.len(), 1); - } - - #[test] - fn symbols_are_copy() { - let mut interner = Interner::new(); - let sym = interner.intern("x"); - - // Symbol is Copy, so this should work without move - let copy = sym; - assert_eq!(sym, copy); - } - - #[test] - fn symbol_ordering_is_insertion_order() { - let mut interner = Interner::new(); - - let z = interner.intern("z"); - let a = interner.intern("a"); - - // z was inserted first, so z < a by insertion order - assert!(z < a); - } - #[test] fn def_id_roundtrip() { let id = DefId::from_raw(42); diff --git a/crates/plotnik-lib/src/query/type_check/unify.rs b/crates/plotnik-lib/src/query/type_check/unify.rs index f423b1fc..b5ffd207 100644 --- a/crates/plotnik-lib/src/query/type_check/unify.rs +++ b/crates/plotnik-lib/src/query/type_check/unify.rs @@ -152,6 +152,7 @@ fn unify_type_ids(a: TypeId, b: TypeId, field: Symbol) -> Result Date: Mon, 22 Dec 2025 15:53:24 -0300 Subject: [PATCH 10/18] Refactor --- crates/plotnik-lib/src/query/expr_arity.rs | 224 -------- .../plotnik-lib/src/query/expr_arity_tests.rs | 523 ------------------ crates/plotnik-lib/src/query/link.rs | 39 +- crates/plotnik-lib/src/query/mod.rs | 3 - crates/plotnik-lib/src/query/printer.rs | 7 +- crates/plotnik-lib/src/query/query.rs | 77 +-- crates/plotnik-lib/src/query/query_tests.rs | 11 - .../src/query/type_check/context.rs | 2 +- .../plotnik-lib/src/query/type_check/mod.rs | 2 +- 9 files changed, 83 insertions(+), 805 deletions(-) delete mode 100644 crates/plotnik-lib/src/query/expr_arity.rs delete mode 100644 crates/plotnik-lib/src/query/expr_arity_tests.rs diff --git a/crates/plotnik-lib/src/query/expr_arity.rs b/crates/plotnik-lib/src/query/expr_arity.rs deleted file mode 100644 index cc6a4034..00000000 --- a/crates/plotnik-lib/src/query/expr_arity.rs +++ /dev/null @@ -1,224 +0,0 @@ -//! Expression arity analysis for query expressions. -//! -//! Determines whether an expression matches a single node position (`One`) -//! or multiple sequential positions (`Many`). Used to validate field constraints: -//! `field: expr` requires `expr` to have `ExprArity::One`. -//! -//! `Invalid` marks nodes where arity cannot be determined (error nodes, -//! undefined refs, etc.). - -use std::collections::HashMap; - -use super::query::AstMap; -use super::source_map::SourceId; -use super::symbol_table::SymbolTable; -use super::visitor::{Visitor, walk_expr, walk_field_expr}; -use crate::diagnostics::{DiagnosticKind, Diagnostics}; -use crate::parser::{Expr, FieldExpr, Ref, SeqExpr, SyntaxKind, SyntaxNode, ast}; - -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum ExprArity { - One, - Many, - Invalid, -} - -pub type ExprArityTable = HashMap; - -pub fn infer_arities( - ast_map: &AstMap, - symbol_table: &SymbolTable, - diag: &mut Diagnostics, -) -> ExprArityTable { - let mut arity_table = ExprArityTable::default(); - - for (&source_id, root) in ast_map { - let ctx = ArityContext { - symbol_table, - arity_table, - diag, - source_id, - }; - let mut computer = ArityComputer { ctx }; - computer.visit(root); - arity_table = computer.ctx.arity_table; - } - - for (&source_id, root) in ast_map { - let ctx = ArityContext { - symbol_table, - arity_table, - diag, - source_id, - }; - let mut validator = ArityValidator { ctx }; - validator.visit(root); - arity_table = validator.ctx.arity_table; - } - - arity_table -} - -pub fn resolve_arity(node: &SyntaxNode, table: &ExprArityTable) -> Option { - if node.kind() == SyntaxKind::Error { - return Some(ExprArity::Invalid); - } - - // Try casting to Expr first as it's the most common query - if let Some(expr) = ast::Expr::cast(node.clone()) { - return table.get(&expr).copied(); - } - - // Root: arity based on definition count - if let Some(root) = ast::Root::cast(node.clone()) { - return Some(if root.defs().nth(1).is_some() { - ExprArity::Many - } else { - ExprArity::One - }); - } - - // Def: delegate to body's arity - if let Some(def) = ast::Def::cast(node.clone()) { - return def.body().and_then(|b| table.get(&b).copied()); - } - - // Branch: delegate to body's arity - if let Some(branch) = ast::Branch::cast(node.clone()) { - return branch.body().and_then(|b| table.get(&b).copied()); - } - - None -} - -struct ArityContext<'a, 'd> { - symbol_table: &'a SymbolTable, - arity_table: ExprArityTable, - diag: &'d mut Diagnostics, - source_id: SourceId, -} - -impl ArityContext<'_, '_> { - fn compute_arity(&mut self, expr: &Expr) -> ExprArity { - if let Some(&c) = self.arity_table.get(expr) { - return c; - } - // Insert sentinel to break cycles (e.g., `Foo = (Foo)`) - self.arity_table.insert(expr.clone(), ExprArity::Invalid); - - let c = self.compute_single_arity(expr); - self.arity_table.insert(expr.clone(), c); - c - } - - fn compute_single_arity(&mut self, expr: &Expr) -> ExprArity { - match expr { - Expr::NamedNode(_) | Expr::AnonymousNode(_) | Expr::FieldExpr(_) | Expr::AltExpr(_) => { - ExprArity::One - } - - Expr::SeqExpr(seq) => self.seq_arity(seq), - - Expr::CapturedExpr(cap) => cap - .inner() - .map(|inner| self.compute_arity(&inner)) - .unwrap_or(ExprArity::Invalid), - - Expr::QuantifiedExpr(q) => q - .inner() - .map(|inner| self.compute_arity(&inner)) - .unwrap_or(ExprArity::Invalid), - - Expr::Ref(r) => self.ref_arity(r), - } - } - - fn seq_arity(&mut self, seq: &SeqExpr) -> ExprArity { - // Avoid collecting into Vec; check if we have 0, 1, or >1 children. - let mut children = seq.children(); - - match children.next() { - None => ExprArity::One, - Some(first) => { - if children.next().is_some() { - ExprArity::Many - } else { - self.compute_arity(&first) - } - } - } - } - - fn ref_arity(&mut self, r: &Ref) -> ExprArity { - let name_tok = r.name().expect( - "expr_arities: Ref without name token \ - (parser only creates Ref for PascalCase Id)", - ); - let name = name_tok.text(); - - self.symbol_table - .get(name) - .map(|body| self.compute_arity(body)) - .unwrap_or(ExprArity::Invalid) - } - - fn validate_field(&mut self, field: &FieldExpr) { - let Some(value) = field.value() else { - return; - }; - - let card = self - .arity_table - .get(&value) - .copied() - .unwrap_or(ExprArity::One); - - if card == ExprArity::Many { - let field_name = field - .name() - .map(|t| t.text().to_string()) - .unwrap_or_else(|| "field".to_string()); - - let mut builder = self - .diag - .report( - self.source_id, - DiagnosticKind::FieldSequenceValue, - value.text_range(), - ) - .message(field_name); - - // If value is a reference, add related info pointing to definition - if let Expr::Ref(r) = &value - && let Some(name_tok) = r.name() - && let Some((def_source, def_body)) = self.symbol_table.get_full(name_tok.text()) - { - builder = builder.related_to(def_source, def_body.text_range(), "defined here"); - } - - builder.emit(); - } - } -} - -struct ArityComputer<'a, 'd> { - ctx: ArityContext<'a, 'd>, -} - -impl Visitor for ArityComputer<'_, '_> { - fn visit_expr(&mut self, expr: &Expr) { - self.ctx.compute_arity(expr); - walk_expr(self, expr); - } -} - -struct ArityValidator<'a, 'd> { - ctx: ArityContext<'a, 'd>, -} - -impl Visitor for ArityValidator<'_, '_> { - fn visit_field_expr(&mut self, field: &FieldExpr) { - self.ctx.validate_field(field); - walk_field_expr(self, field); - } -} diff --git a/crates/plotnik-lib/src/query/expr_arity_tests.rs b/crates/plotnik-lib/src/query/expr_arity_tests.rs deleted file mode 100644 index d4f02a58..00000000 --- a/crates/plotnik-lib/src/query/expr_arity_tests.rs +++ /dev/null @@ -1,523 +0,0 @@ -use crate::Query; -use indoc::indoc; - -#[test] -fn tree_is_one() { - let input = "Q = (identifier)"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ identifier - "); -} - -#[test] -fn singleton_seq_is_one() { - let input = "Q = {(identifier)}"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Seq¹ - NamedNode¹ identifier - "); -} - -#[test] -fn nested_singleton_seq_is_one() { - let input = "Q = {{{(identifier)}}}"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Seq¹ - Seq¹ - Seq¹ - NamedNode¹ identifier - "); -} - -#[test] -fn multi_seq_is_many() { - let input = "Q = {(a) (b)}"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def⁺ Q - Seq⁺ - NamedNode¹ a - NamedNode¹ b - "); -} - -#[test] -fn alt_is_one() { - let input = "Q = [(a) (b)]"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Alt¹ - Branch¹ - NamedNode¹ a - Branch¹ - NamedNode¹ b - "); -} - -#[test] -fn alt_with_seq_branches() { - let input = indoc! {r#" - Q = [{(a) (b)} (c)] - "#}; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Alt¹ - Branch⁺ - Seq⁺ - NamedNode¹ a - NamedNode¹ b - Branch¹ - NamedNode¹ c - "); -} - -#[test] -fn ref_to_tree_is_one() { - let input = indoc! {r#" - X = (identifier) - Q = (call (X)) - "#}; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root⁺ - Def¹ X - NamedNode¹ identifier - Def¹ Q - NamedNode¹ call - Ref¹ X - "); -} - -#[test] -fn ref_to_seq_is_many() { - let input = indoc! {r#" - X = {(a) (b)} - Q = (call (X)) - "#}; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root⁺ - Def⁺ X - Seq⁺ - NamedNode¹ a - NamedNode¹ b - Def¹ Q - NamedNode¹ call - Ref⁺ X - "); -} - -#[test] -fn field_with_tree() { - let input = "Q = (call name: (identifier))"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ call - FieldExpr¹ name: - NamedNode¹ identifier - "); -} - -#[test] -fn field_with_alt() { - let input = "Q = (call name: [(identifier) (string)])"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ call - FieldExpr¹ name: - Alt¹ - Branch¹ - NamedNode¹ identifier - Branch¹ - NamedNode¹ string - "); -} - -#[test] -fn field_with_seq_error() { - let input = "Q = (call name: {(a) (b)})"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: field `name` must match exactly one node, not a sequence - | - 1 | Q = (call name: {(a) (b)}) - | ^^^^^^^^^ - - error: field `name` must match exactly one node, not a sequence - | - 1 | Q = (call name: {(a) (b)}) - | ^^^^^^^^^ - "); -} - -#[test] -fn field_with_ref_to_seq_error() { - let input = indoc! {r#" - X = {(a) (b)} - Q = (call name: (X)) - "#}; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: field `name` must match exactly one node, not a sequence - | - 1 | X = {(a) (b)} - | --------- defined here - 2 | Q = (call name: (X)) - | ^^^ - - error: field `name` must match exactly one node, not a sequence - | - 1 | X = {(a) (b)} - | --------- defined here - 2 | Q = (call name: (X)) - | ^^^ - "); -} - -#[test] -fn quantifier_preserves_inner_arity() { - let input = "Q = (identifier)*"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - QuantifiedExpr¹ * - NamedNode¹ identifier - "); -} - -#[test] -fn capture_preserves_inner_arity() { - let input = "Q = (identifier) @name"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - CapturedExpr¹ @name - NamedNode¹ identifier - "); -} - -#[test] -fn capture_on_seq() { - let input = "Q = {(a) (b)} @items"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def⁺ Q - CapturedExpr⁺ @items - Seq⁺ - NamedNode¹ a - NamedNode¹ b - "); -} - -#[test] -fn complex_nested_arities() { - let input = indoc! {r#" - Stmt = [(expr_stmt) (return_stmt)] - Q = (function_definition - name: (identifier) @name - body: (block (Stmt)* @stmts)) - "#}; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root⁺ - Def¹ Stmt - Alt¹ - Branch¹ - NamedNode¹ expr_stmt - Branch¹ - NamedNode¹ return_stmt - Def¹ Q - NamedNode¹ function_definition - CapturedExpr¹ @name - FieldExpr¹ name: - NamedNode¹ identifier - FieldExpr¹ body: - NamedNode¹ block - CapturedExpr¹ @stmts - QuantifiedExpr¹ * - Ref¹ Stmt - "); -} - -#[test] -fn tagged_alt_arities() { - let input = indoc! {r#" - Q = [Ident: (identifier) Num: (number)] - "#}; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Alt¹ - Branch¹ Ident: - NamedNode¹ identifier - Branch¹ Num: - NamedNode¹ number - "); -} - -#[test] -fn anchor_has_no_arity() { - let input = "Q = (block . (statement))"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ block - . - NamedNode¹ statement - "); -} - -#[test] -fn negated_field_has_no_arity() { - let input = "Q = (function !async)"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ function - NegatedField !async - "); -} - -#[test] -fn tree_with_wildcard_type() { - let input = "Q = (_)"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - NamedNode¹ (any) - "); -} - -#[test] -fn bare_wildcard_is_one() { - let input = "Q = _"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - AnonymousNode¹ (any) - "); -} - -#[test] -fn empty_seq_is_one() { - let input = "Q = {}"; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r" - Root¹ - Def¹ Q - Seq¹ - "); -} - -#[test] -fn literal_is_one() { - let input = r#" - Q = "if" - "#; - - let res = Query::expect_valid_arities(input); - - insta::assert_snapshot!(res, @r#" - Root¹ - Def¹ Q - AnonymousNode¹ "if" - "#); -} - -#[test] -fn invalid_error_node() { - let input = "Q = (foo %)"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: unexpected token; not valid inside a node — try `(child)` or close with `)` - | - 1 | Q = (foo %) - | ^ - "); -} - -#[test] -fn invalid_undefined_ref() { - let input = "Q = (Undefined)"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: `Undefined` is not defined - | - 1 | Q = (Undefined) - | ^^^^^^^^^ - "); -} - -#[test] -fn invalid_branch_without_body() { - let input = "Q = [A:]"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: expected an expression; after `Label:` - | - 1 | Q = [A:] - | ^ - "); -} - -#[test] -fn invalid_ref_to_bodyless_def() { - let input = indoc! {r#" - X = % - Q = (X) - "#}; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: expected an expression; after `=` in definition - | - 1 | X = % - | ^ - - error: `X` is not defined - | - 2 | Q = (X) - | ^ - "); -} - -#[test] -fn invalid_capture_without_inner() { - // Error recovery: `extra` is invalid, but `@y` still creates a Capture node - let input = "Q = (call extra @y)"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: bare identifier is not a valid expression; wrap in parentheses: `(identifier)` - | - 1 | Q = (call extra @y) - | ^^^^^ - "); -} - -#[test] -fn invalid_capture_without_inner_standalone() { - // Standalone capture without preceding expression - let input = "Q = @x"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: expected an expression; after `=` in definition - | - 1 | Q = @x - | ^ - "); -} - -#[test] -fn invalid_multiple_captures_with_error() { - let input = "Q = (call (Undefined) @x extra @y)"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: `Undefined` is not defined - | - 1 | Q = (call (Undefined) @x extra @y) - | ^^^^^^^^^ - - error: bare identifier is not a valid expression; wrap in parentheses: `(identifier)` - | - 1 | Q = (call (Undefined) @x extra @y) - | ^^^^^ - "); -} - -#[test] -fn invalid_quantifier_without_inner() { - // Error recovery: `extra` is invalid, but `*` still creates a Quantifier node - let input = "Q = (foo extra*)"; - - let res = Query::expect_invalid(input); - - insta::assert_snapshot!(res, @r" - error: bare identifier is not a valid expression; wrap in parentheses: `(identifier)` - | - 1 | Q = (foo extra*) - | ^^^^^ - "); -} diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index cdd8358a..5a660f01 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -8,10 +8,19 @@ use std::collections::HashMap; use indexmap::IndexSet; -use plotnik_core::{NodeFieldId, NodeTypeId}; +use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol}; use plotnik_langs::Lang; use rowan::TextRange; +/// Output from the link phase for binary emission. +#[derive(Default)] +pub struct LinkOutput { + /// Interned name → NodeTypeId (for binary: StringId → NodeTypeId) + pub node_type_ids: HashMap, + /// Interned name → NodeFieldId (for binary: StringId → NodeFieldId) + pub node_field_ids: HashMap, +} + use crate::diagnostics::{DiagnosticKind, Diagnostics}; use crate::parser::ast::{self, Expr, NamedNode}; use crate::parser::cst::{SyntaxKind, SyntaxToken}; @@ -32,18 +41,24 @@ pub fn link<'q>( source_map: &'q SourceMap, lang: &Lang, symbol_table: &SymbolTable, - node_type_ids: &mut HashMap<&'q str, Option>, - node_field_ids: &mut HashMap<&'q str, Option>, + output: &mut LinkOutput, + interner: &mut Interner, diagnostics: &mut Diagnostics, ) { + // Local deduplication maps (not exposed in output) + let mut node_type_ids: HashMap<&'q str, Option> = HashMap::new(); + let mut node_field_ids: HashMap<&'q str, Option> = HashMap::new(); + for (&source_id, root) in ast_map { let mut linker = Linker { source_map, source_id, lang, symbol_table, - node_type_ids, - node_field_ids, + node_type_ids: &mut node_type_ids, + node_field_ids: &mut node_field_ids, + output, + interner, diagnostics, }; linker.link(root); @@ -57,6 +72,8 @@ struct Linker<'a, 'q> { symbol_table: &'a SymbolTable, node_type_ids: &'a mut HashMap<&'q str, Option>, node_field_ids: &'a mut HashMap<&'q str, Option>, + output: &'a mut LinkOutput, + interner: &'a mut Interner, diagnostics: &'a mut Diagnostics, } @@ -96,6 +113,10 @@ impl<'a, 'q> Linker<'a, 'q> { let resolved = self.lang.resolve_named_node(type_name); self.node_type_ids .insert(token_src(&type_token, self.source()), resolved); + if let Some(id) = resolved { + let sym = self.interner.intern(type_name); + self.output.node_type_ids.entry(sym).or_insert(id); + } if resolved.is_none() { let all_types = self.lang.all_named_node_kinds(); let max_dist = (type_name.len() / 3).clamp(2, 4); @@ -133,7 +154,9 @@ impl<'a, 'q> Linker<'a, 'q> { let resolved = self.lang.resolve_field(field_name); self.node_field_ids .insert(token_src(&name_token, self.source()), resolved); - if resolved.is_some() { + if let Some(id) = resolved { + let sym = self.interner.intern(field_name); + self.output.node_field_ids.entry(sym).or_insert(id); return; } let all_fields = self.lang.all_field_names(); @@ -406,6 +429,10 @@ impl Visitor for NodeTypeCollector<'_, '_, '_> { self.linker .node_type_ids .insert(token_src(&value_token, self.linker.source()), resolved); + if let Some(id) = resolved { + let sym = self.linker.interner.intern(value); + self.linker.output.node_type_ids.entry(sym).or_insert(id); + } if resolved.is_none() { self.linker diff --git a/crates/plotnik-lib/src/query/mod.rs b/crates/plotnik-lib/src/query/mod.rs index 55778847..3dccff31 100644 --- a/crates/plotnik-lib/src/query/mod.rs +++ b/crates/plotnik-lib/src/query/mod.rs @@ -10,7 +10,6 @@ pub use symbol_table::SymbolTable; pub mod alt_kinds; mod dependencies; -pub mod expr_arity; pub mod link; #[allow(clippy::module_inception)] pub mod query; @@ -22,8 +21,6 @@ pub mod visitor; mod alt_kinds_tests; #[cfg(test)] mod dependencies_tests; -#[cfg(test)] -mod expr_arity_tests; #[cfg(all(test, feature = "plotnik-langs"))] mod link_tests; #[cfg(test)] diff --git a/crates/plotnik-lib/src/query/printer.rs b/crates/plotnik-lib/src/query/printer.rs index 333abc5f..389e8722 100644 --- a/crates/plotnik-lib/src/query/printer.rs +++ b/crates/plotnik-lib/src/query/printer.rs @@ -8,8 +8,8 @@ use rowan::NodeOrToken; use crate::parser::{self as ast, Expr, SyntaxNode}; use super::Query; -use super::expr_arity::ExprArity; use super::source_map::SourceKind; +use super::type_check::Arity; pub struct QueryPrinter<'q> { query: &'q Query, @@ -385,9 +385,8 @@ impl<'q> QueryPrinter<'q> { return ""; } match self.query.get_arity(node) { - Some(ExprArity::One) => "¹", - Some(ExprArity::Many) => "⁺", - Some(ExprArity::Invalid) => "⁻", + Some(Arity::One) => "¹", + Some(Arity::Many) => "⁺", None => "ˣ", } } diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index 9e8ba7d5..4676a347 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -4,14 +4,13 @@ use std::ops::{Deref, DerefMut}; use indexmap::IndexMap; -use plotnik_core::{Interner, NodeFieldId, NodeTypeId}; +use plotnik_core::{Interner, NodeFieldId, NodeTypeId, Symbol}; use plotnik_langs::Lang; use crate::Diagnostics; use crate::parser::{ParseResult, Parser, Root, SyntaxNode, lexer::lex}; use crate::query::alt_kinds::validate_alt_kinds; use crate::query::dependencies; -use crate::query::expr_arity::{ExprArity, ExprArityTable, infer_arities, resolve_arity}; use crate::query::link; use crate::query::source_map::{SourceId, SourceMap}; use crate::query::symbol_table::{SymbolTable, resolve_names}; @@ -119,10 +118,7 @@ impl QueryParsed { &mut self.diag, ); - // Legacy arity table (to be removed once type_check is fully integrated) - let arity_table = infer_arities(&self.ast_map, &symbol_table, &mut self.diag); - - // New unified type checking pass - receives mutable interner reference + // Unified type checking pass let type_context = type_check::infer_types( &self.ast_map, &symbol_table, @@ -135,7 +131,6 @@ impl QueryParsed { query_parsed: self, interner, symbol_table, - arity_table, type_context, } } @@ -159,7 +154,6 @@ pub struct QueryAnalyzed { query_parsed: QueryParsed, interner: Interner, pub symbol_table: SymbolTable, - arity_table: ExprArityTable, type_context: TypeContext, } @@ -168,8 +162,34 @@ impl QueryAnalyzed { !self.diag.has_errors() } - pub fn get_arity(&self, node: &SyntaxNode) -> Option { - resolve_arity(node, &self.arity_table) + pub fn get_arity(&self, node: &SyntaxNode) -> Option { + use crate::parser::ast::{self, Expr}; + + // Try casting to Expr first as it's the most common query + if let Some(expr) = ast::Expr::cast(node.clone()) { + return self.type_context.get_arity(&expr); + } + + // Root: arity based on definition count + if let Some(root) = ast::Root::cast(node.clone()) { + return Some(if root.defs().nth(1).is_some() { + Arity::Many + } else { + Arity::One + }); + } + + // Def: delegate to body's arity + if let Some(def) = ast::Def::cast(node.clone()) { + return def.body().and_then(|b| self.type_context.get_arity(&b)); + } + + // Branch: delegate to body's arity + if let Some(branch) = ast::Branch::cast(node.clone()) { + return branch.body().and_then(|b| self.type_context.get_arity(&b)); + } + + None } pub fn type_context(&self) -> &TypeContext { @@ -181,34 +201,22 @@ impl QueryAnalyzed { } pub fn link(mut self, lang: &Lang) -> LinkedQuery { - // Use reference-based hash maps during processing - let mut type_ids: HashMap<&str, Option> = HashMap::new(); - let mut field_ids: HashMap<&str, Option> = HashMap::new(); + let mut output = link::LinkOutput::default(); link::link( &self.query_parsed.ast_map, &self.query_parsed.source_map, lang, &self.symbol_table, - &mut type_ids, - &mut field_ids, + &mut output, + &mut self.interner, &mut self.query_parsed.diag, ); - // Convert to owned for storage - let type_ids_owned = type_ids - .into_iter() - .map(|(k, v)| (k.to_owned(), v)) - .collect(); - let field_ids_owned = field_ids - .into_iter() - .map(|(k, v)| (k.to_owned(), v)) - .collect(); - LinkedQuery { inner: self, - type_ids: type_ids_owned, - field_ids: field_ids_owned, + node_type_ids: output.node_type_ids, + node_field_ids: output.node_field_ids, } } } @@ -237,19 +245,24 @@ impl TryFrom<&str> for QueryAnalyzed { } } -type NodeTypeIdTableOwned = HashMap>; -type NodeFieldIdTableOwned = HashMap>; - pub struct LinkedQuery { inner: QueryAnalyzed, - type_ids: NodeTypeIdTableOwned, - field_ids: NodeFieldIdTableOwned, + node_type_ids: HashMap, + node_field_ids: HashMap, } impl LinkedQuery { pub fn interner(&self) -> &Interner { &self.inner.interner } + + pub fn node_type_ids(&self) -> &HashMap { + &self.node_type_ids + } + + pub fn node_field_ids(&self) -> &HashMap { + &self.node_field_ids + } } impl Deref for LinkedQuery { diff --git a/crates/plotnik-lib/src/query/query_tests.rs b/crates/plotnik-lib/src/query/query_tests.rs index 8e472b30..4d3f3458 100644 --- a/crates/plotnik-lib/src/query/query_tests.rs +++ b/crates/plotnik-lib/src/query/query_tests.rs @@ -141,17 +141,6 @@ fn multifile_field_with_ref_to_seq_error() { assert!(!query.is_valid()); insta::assert_snapshot!(query.dump_diagnostics(), @r" - error: field `name` must match exactly one node, not a sequence - --> main.ptk:1:17 - | - 1 | Q = (call name: (X)) - | ^^^ - | - ::: defs.ptk:1:5 - | - 1 | X = {(a) (b)} - | --------- defined here - error: field `name` must match exactly one node, not a sequence --> main.ptk:1:17 | diff --git a/crates/plotnik-lib/src/query/type_check/context.rs b/crates/plotnik-lib/src/query/type_check/context.rs index 1bb066b4..92cb8fcb 100644 --- a/crates/plotnik-lib/src/query/type_check/context.rs +++ b/crates/plotnik-lib/src/query/type_check/context.rs @@ -203,7 +203,7 @@ impl TypeContext { .and_then(|id| self.def_types.get(&id).copied()) } - /// Get arity for an expression (for backward compatibility with expr_arity). + /// Get arity for an expression. pub fn get_arity(&self, expr: &Expr) -> Option { self.term_info.get(expr).map(|info| info.arity) } diff --git a/crates/plotnik-lib/src/query/type_check/mod.rs b/crates/plotnik-lib/src/query/type_check/mod.rs index eddc8232..2eb15ff7 100644 --- a/crates/plotnik-lib/src/query/type_check/mod.rs +++ b/crates/plotnik-lib/src/query/type_check/mod.rs @@ -3,7 +3,7 @@ //! Computes both structural arity (for field validation) and data flow types //! (for TypeScript emission) in a single traversal. //! -//! Replaces the previous `expr_arity.rs` with a more comprehensive type system. +//! Provides arity validation and type inference for TypeScript emission. mod context; mod emit_ts; From c54741e7d5b0c3d0e274b6ae199878724f0d8dd5 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 16:32:59 -0300 Subject: [PATCH 11/18] Add support for recursive types in type inference --- .../src/query/type_check/context.rs | 25 +-- .../plotnik-lib/src/query/type_check/infer.rs | 47 ++++- .../plotnik-lib/src/query/type_check/mod.rs | 37 ++++ .../plotnik-lib/src/query/type_check_tests.rs | 177 +++++++++++++++--- 4 files changed, 243 insertions(+), 43 deletions(-) diff --git a/crates/plotnik-lib/src/query/type_check/context.rs b/crates/plotnik-lib/src/query/type_check/context.rs index 92cb8fcb..8d7988d6 100644 --- a/crates/plotnik-lib/src/query/type_check/context.rs +++ b/crates/plotnik-lib/src/query/type_check/context.rs @@ -4,12 +4,10 @@ //! Symbols are stored but resolved via external Interner reference. //! TermInfo is cached per-expression to avoid recomputation. -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap, HashSet}; use crate::parser::ast::Expr; -use std::collections::HashMap; - use super::symbol::{DefId, Interner, Symbol}; use super::types::{ Arity, FieldInfo, TYPE_NODE, TYPE_STRING, TYPE_VOID, TermInfo, TypeId, TypeKind, @@ -30,6 +28,8 @@ pub struct TypeContext { def_names: Vec, /// Symbol → DefId reverse lookup def_ids: HashMap, + /// Definitions that are part of a recursive SCC + recursive_defs: HashSet, } impl Default for TypeContext { @@ -47,6 +47,7 @@ impl TypeContext { def_types: HashMap::new(), def_names: Vec::new(), def_ids: HashMap::new(), + recursive_defs: HashSet::new(), }; // Pre-register builtin types at their expected IDs @@ -69,8 +70,6 @@ impl TypeContext { self.def_ids = name_to_def.clone(); } - // ========== Type interning ========== - /// Intern a type, returning its ID. Deduplicates identical types. pub fn intern_type(&mut self, kind: TypeKind) -> TypeId { if let Some(&id) = self.type_map.get(&kind) { @@ -114,8 +113,6 @@ impl TypeContext { } } - // ========== Term info cache ========== - /// Cache term info for an expression. pub fn set_term_info(&mut self, expr: Expr, info: TermInfo) { self.term_info.insert(expr, info); @@ -126,8 +123,6 @@ impl TypeContext { self.term_info.get(expr) } - // ========== Definition registry ========== - /// Register a definition by name, returning its DefId. /// If already registered, returns existing DefId. pub fn register_def(&mut self, interner: &mut Interner, name: &str) -> DefId { @@ -178,7 +173,15 @@ impl TypeContext { interner.resolve(self.def_names[def_id.index()]) } - // ========== Definition types ========== + /// Mark a definition as recursive. + pub fn mark_recursive(&mut self, def_id: DefId) { + self.recursive_defs.insert(def_id); + } + + /// Check if a definition is recursive. + pub fn is_recursive(&self, def_id: DefId) -> bool { + self.recursive_defs.contains(&def_id) + } /// Register the output type for a definition by DefId. pub fn set_def_type(&mut self, def_id: DefId, type_id: TypeId) { @@ -208,8 +211,6 @@ impl TypeContext { self.term_info.get(expr).map(|info| info.arity) } - // ========== Iteration ========== - /// Iterate over all interned types. pub fn iter_types(&self) -> impl Iterator { self.types diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs index 2b0b26f4..c242ea11 100644 --- a/crates/plotnik-lib/src/query/type_check/infer.rs +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -111,12 +111,41 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { TermInfo::new(Arity::One, TypeFlow::Void) } - /// Reference: transparent - propagate body's flow and arity + /// If expr is (or contains at its core) a recursive Ref, return its Ref type. + fn get_recursive_ref_type(&mut self, expr: &Expr) -> Option { + match expr { + Expr::Ref(r) => { + let name_tok = r.name()?; + let name_sym = self.interner.intern(name_tok.text()); + let def_id = self.ctx.get_def_id_sym(name_sym)?; + if self.ctx.is_recursive(def_id) { + Some(self.ctx.intern_type(TypeKind::Ref(def_id))) + } else { + None + } + } + Expr::QuantifiedExpr(q) => q.inner().and_then(|i| self.get_recursive_ref_type(&i)), + Expr::CapturedExpr(c) => c.inner().and_then(|i| self.get_recursive_ref_type(&i)), + Expr::FieldExpr(f) => f.value().and_then(|v| self.get_recursive_ref_type(&v)), + _ => None, + } + } + + /// Reference: transparent for non-recursive defs, opaque boundary for recursive ones. fn infer_ref(&mut self, r: &Ref) -> TermInfo { let Some(name_tok) = r.name() else { return TermInfo::void(); }; let name = name_tok.text(); + let name_sym = self.interner.intern(name); + + // Recursive refs are opaque boundaries - they match but don't bubble captures. + // The Ref type is created when a recursive ref is captured (in infer_captured_expr). + if let Some(def_id) = self.ctx.get_def_id_sym(name_sym) + && self.ctx.is_recursive(def_id) + { + return TermInfo::new(Arity::One, TypeFlow::Void); + } // Get the body expression for this definition let Some(body) = self.symbol_table.get(name) else { @@ -124,7 +153,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { return TermInfo::void(); }; - // Refs are transparent - propagate body's flow and arity + // Non-recursive refs are transparent - propagate body's flow and arity self.infer_expr(body) } @@ -310,8 +339,13 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { // Transform based on inner's flow let captured_type = match &inner_info.flow { TypeFlow::Void => { - // @name on Void → capture produces Node (or annotated type) - annotation_type.unwrap_or(TYPE_NODE) + // Check if inner is a recursive ref - if so, capture produces Ref type + if let Some(ref_type) = self.get_recursive_ref_type(&inner) { + annotation_type.unwrap_or(ref_type) + } else { + // @name on Void → capture produces Node (or annotated type) + annotation_type.unwrap_or(TYPE_NODE) + } } TypeFlow::Scalar(type_id) => { // @name on Scalar → capture that scalar type @@ -390,9 +424,10 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { let flow = match inner_info.flow { TypeFlow::Void => { - // Scalar list: void inner becomes array of Node + // Scalar list: void inner becomes array of Node (or Ref for recursive) + let element = self.get_recursive_ref_type(&inner).unwrap_or(TYPE_NODE); let array_type = self.ctx.intern_type(TypeKind::Array { - element: TYPE_NODE, + element, non_empty: quantifier.is_non_empty(), }); TypeFlow::Scalar(array_type) diff --git a/crates/plotnik-lib/src/query/type_check/mod.rs b/crates/plotnik-lib/src/query/type_check/mod.rs index 2eb15ff7..1b4d1f6c 100644 --- a/crates/plotnik-lib/src/query/type_check/mod.rs +++ b/crates/plotnik-lib/src/query/type_check/mod.rs @@ -52,6 +52,29 @@ pub fn infer_types( dependency_analysis.name_to_def(), ); + // Mark recursive definitions before inference. + // A def is recursive if it's in an SCC with >1 member, or it references itself. + for scc in &dependency_analysis.sccs { + let is_recursive_scc = if scc.len() > 1 { + true + } else if let Some(name) = scc.first() + && let Some(body) = symbol_table.get(name) + { + body_references_self(body, name) + } else { + false + }; + + if is_recursive_scc { + for def_name in scc { + let sym = interner.intern(def_name); + if let Some(def_id) = ctx.get_def_id_sym(sym) { + ctx.mark_recursive(def_id); + } + } + } + } + // Process definitions in SCC order (leaves first) for scc in &dependency_analysis.sccs { for def_name in scc { @@ -100,6 +123,20 @@ pub fn infer_types( ctx } +/// Check if an expression body contains a reference to the given name. +fn body_references_self(body: &crate::parser::ast::Expr, name: &str) -> bool { + use crate::parser::ast::Ref; + for descendant in body.as_cst().descendants() { + if let Some(r) = Ref::cast(descendant) + && let Some(name_tok) = r.name() + && name_tok.text() == name + { + return true; + } + } + false +} + /// Convert a TypeFlow to a TypeId for storage. fn flow_to_type_id(ctx: &mut TypeContext, flow: &TypeFlow) -> TypeId { match flow { diff --git a/crates/plotnik-lib/src/query/type_check_tests.rs b/crates/plotnik-lib/src/query/type_check_tests.rs index 84046525..492477d1 100644 --- a/crates/plotnik-lib/src/query/type_check_tests.rs +++ b/crates/plotnik-lib/src/query/type_check_tests.rs @@ -4,7 +4,9 @@ use indoc::indoc; #[test] fn capture_single_node() { let input = "Q = (identifier) @name"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -20,7 +22,9 @@ fn capture_single_node() { #[test] fn capture_with_string_annotation() { let input = "Q = (identifier) @name :: string"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Q { name: string; @@ -31,7 +35,9 @@ fn capture_with_string_annotation() { #[test] fn capture_with_custom_type() { let input = "Q = (identifier) @name :: Identifier"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -48,9 +54,10 @@ fn capture_with_custom_type() { #[test] fn named_node_with_field_capture() { - // Child capture should bubble up through named node let input = "Q = (function name: (identifier) @name)"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -65,8 +72,15 @@ fn named_node_with_field_capture() { #[test] fn named_node_multiple_field_captures() { - let input = "Q = (function name: (identifier) @name body: (block) @body)"; + let input = indoc! {r#" + Q = (function + name: (identifier) @name + body: (block) @body + ) + "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -82,8 +96,14 @@ fn named_node_multiple_field_captures() { #[test] fn nested_named_node_captures() { - let input = "Q = (call function: (member target: (identifier) @target))"; + let input = indoc! {r#" + Q = (call + function: (member target: (identifier) @target) + ) + "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -98,9 +118,10 @@ fn nested_named_node_captures() { #[test] fn scalar_list_zero_or_more() { - // No internal captures → scalar list: Node[] let input = "Q = (decorator)* @decorators"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -115,9 +136,10 @@ fn scalar_list_zero_or_more() { #[test] fn scalar_list_one_or_more() { - // No internal captures → non-empty array: [Node, ...Node[]] let input = "Q = (identifier)+ @names"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -133,9 +155,11 @@ fn scalar_list_one_or_more() { #[test] fn row_list_basic() { let input = indoc! {r#" - Q = {(key) @k (value) @v}* @rows + Q = {(key) @k (value) @v}* @rows "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -156,9 +180,11 @@ fn row_list_basic() { #[test] fn row_list_non_empty() { let input = indoc! {r#" - Q = {(key) @k (value) @v}+ @rows + Q = {(key) @k (value) @v}+ @rows "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -179,7 +205,9 @@ fn row_list_non_empty() { #[test] fn optional_single_capture() { let input = "Q = (decorator)? @dec"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -197,6 +225,7 @@ fn optional_group_bubbles_fields() { let input = indoc! {r#" Q = {(modifier) @mod (decorator) @dec}? "#}; + let res = Query::expect_valid_types(input); insta::assert_snapshot!(res, @r" export interface Node { @@ -216,7 +245,9 @@ fn sequence_merges_fields() { let input = indoc! {r#" Q = {(a) @a (b) @b} "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -235,7 +266,9 @@ fn captured_sequence_creates_struct() { let input = indoc! {r#" Q = {(a) @a (b) @b} @row "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -255,9 +288,10 @@ fn captured_sequence_creates_struct() { #[test] fn untagged_alt_same_capture_all_branches() { - // Same capture in all branches → required field let input = "Q = [(a) @x (b) @x]"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -272,9 +306,10 @@ fn untagged_alt_same_capture_all_branches() { #[test] fn untagged_alt_different_captures() { - // Different captures → both optional let input = "Q = [(a) @a (b) @b]"; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -291,12 +326,14 @@ fn untagged_alt_different_captures() { #[test] fn untagged_alt_partial_overlap() { let input = indoc! {r#" - Q = [ - {(a) @x (b) @y} - {(a) @x} - ] + Q = [ + {(a) @x (b) @y} + {(a) @x} + ] "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -313,9 +350,14 @@ fn untagged_alt_partial_overlap() { #[test] fn tagged_alt_basic() { let input = indoc! {r#" - Q = [Str: (string) @s Num: (number) @n] + Q = [ + Str: (string) @s + Num: (number) @n + ] "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r#" export interface Node { kind: string; @@ -339,9 +381,14 @@ fn tagged_alt_basic() { #[test] fn tagged_alt_with_type_annotation() { let input = indoc! {r#" - Q = [Str: (string) @s ::string Num: (number) @n] + Q = [ + Str: (string) @s :: string + Num: (number) @n + ] "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r#" export interface Node { kind: string; @@ -364,11 +411,15 @@ fn tagged_alt_with_type_annotation() { #[test] fn tagged_alt_captured() { - // Captured tagged alternation let input = indoc! {r#" - Q = [Str: (string) @s Num: (number) @n] @result + Q = [ + Str: (string) @s + Num: (number) @n + ] @result "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r#" export interface Node { kind: string; @@ -396,12 +447,14 @@ fn tagged_alt_captured() { #[test] fn nested_captured_group() { let input = indoc! {r#" - Q = { - (identifier) @name - {(key) @k (value) @v} @pair - } + Q = { + (identifier) @name + {(key) @k (value) @v} @pair + } "#}; + let res = Query::expect_valid_types(input); + insta::assert_snapshot!(res, @r" export interface Node { kind: string; @@ -423,9 +476,11 @@ fn nested_captured_group() { #[test] fn error_star_with_internal_captures_no_row() { let input = indoc! {r#" - Bad = {(a) @a (b) @b}* + Bad = {(a) @a (b) @b}* "#}; + let res = Query::expect_invalid(input); + insta::assert_snapshot!(res, @r" error: quantifier `*` contains captures (`@a`, `@b`) but no row capture | @@ -439,9 +494,11 @@ fn error_star_with_internal_captures_no_row() { #[test] fn error_plus_with_internal_capture_no_row() { let input = indoc! {r#" - Bad = {(c) @c}+ + Bad = {(c) @c}+ "#}; + let res = Query::expect_invalid(input); + insta::assert_snapshot!(res, @r" error: quantifier `+` contains captures (`@c`) but no row capture | @@ -454,11 +511,12 @@ fn error_plus_with_internal_capture_no_row() { #[test] fn error_named_node_with_capture_quantified() { - // (func (id) @name)* has internal capture let input = indoc! {r#" - Bad = (func (identifier) @name)* + Bad = (func (identifier) @name)* "#}; + let res = Query::expect_invalid(input); + insta::assert_snapshot!(res, @r" error: quantifier `*` contains captures (`@name`) but no row capture | @@ -468,3 +526,72 @@ fn error_named_node_with_capture_quantified() { help: wrap as `{...}* @rows` "); } + +#[test] +fn recursive_type_with_alternation() { + let input = indoc! {r#" + Expr = [ + Lit: (number) @value ::string + Binary: (binary_expression + left: (Expr) @left + right: (Expr) @right) + ] + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r#" + export interface ExprBinary { + $tag: "Binary"; + $data: { left: Expr; right: Expr }; + } + + export interface ExprLit { + $tag: "Lit"; + $data: { value: string }; + } + + export type Expr = ExprBinary | ExprLit; + "#); +} + +#[test] +fn recursive_type_optional_self_ref() { + let input = indoc! {r#" + NestedCall = (call_expression + function: [ + (identifier) @name + (NestedCall) @inner + ] + ) + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface NestedCall { + inner?: NestedCall; + name?: Node; + } + "); +} + +#[test] +fn recursive_type_in_quantified_context() { + let input = indoc! {r#" + Item = (item (Item)* @children) + "#}; + + let res = Query::expect_valid_types(input); + + insta::assert_snapshot!(res, @r" + export interface Item { + children: Item[]; + } + "); +} From 5adc415199abf20e00f204d478a3a39948a0f86d Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 17:18:33 -0300 Subject: [PATCH 12/18] Refactor signatures --- crates/plotnik-lib/src/query/link.rs | 19 ++++++++++--------- crates/plotnik-lib/src/query/query.rs | 8 ++++---- .../src/query/type_check/context.rs | 13 +++++++------ .../plotnik-lib/src/query/type_check/infer.rs | 11 ++++++----- .../plotnik-lib/src/query/type_check/mod.rs | 6 +++--- 5 files changed, 30 insertions(+), 27 deletions(-) diff --git a/crates/plotnik-lib/src/query/link.rs b/crates/plotnik-lib/src/query/link.rs index 5a660f01..f259e05a 100644 --- a/crates/plotnik-lib/src/query/link.rs +++ b/crates/plotnik-lib/src/query/link.rs @@ -37,12 +37,12 @@ use super::visitor::{Visitor, walk}; /// This function is decoupled from `Query` to allow easier testing and /// modularity. It orchestrates the resolution and validation phases. pub fn link<'q>( - ast_map: &AstMap, - source_map: &'q SourceMap, + interner: &mut Interner, lang: &Lang, + source_map: &'q SourceMap, + ast_map: &AstMap, symbol_table: &SymbolTable, output: &mut LinkOutput, - interner: &mut Interner, diagnostics: &mut Diagnostics, ) { // Local deduplication maps (not exposed in output) @@ -51,14 +51,14 @@ pub fn link<'q>( for (&source_id, root) in ast_map { let mut linker = Linker { - source_map, - source_id, + interner, lang, + source_map, symbol_table, + source_id, node_type_ids: &mut node_type_ids, node_field_ids: &mut node_field_ids, output, - interner, diagnostics, }; linker.link(root); @@ -66,14 +66,15 @@ pub fn link<'q>( } struct Linker<'a, 'q> { - source_map: &'q SourceMap, - source_id: SourceId, + // Refs + interner: &'a mut Interner, lang: &'a Lang, + source_map: &'q SourceMap, symbol_table: &'a SymbolTable, + source_id: SourceId, node_type_ids: &'a mut HashMap<&'q str, Option>, node_field_ids: &'a mut HashMap<&'q str, Option>, output: &'a mut LinkOutput, - interner: &'a mut Interner, diagnostics: &'a mut Diagnostics, } diff --git a/crates/plotnik-lib/src/query/query.rs b/crates/plotnik-lib/src/query/query.rs index 4676a347..5d72e6b1 100644 --- a/crates/plotnik-lib/src/query/query.rs +++ b/crates/plotnik-lib/src/query/query.rs @@ -120,11 +120,11 @@ impl QueryParsed { // Unified type checking pass let type_context = type_check::infer_types( + &mut interner, &self.ast_map, &symbol_table, &dependency_analysis, &mut self.diag, - &mut interner, ); QueryAnalyzed { @@ -204,12 +204,12 @@ impl QueryAnalyzed { let mut output = link::LinkOutput::default(); link::link( - &self.query_parsed.ast_map, - &self.query_parsed.source_map, + &mut self.interner, lang, + &self.query_parsed.source_map, + &self.query_parsed.ast_map, &self.symbol_table, &mut output, - &mut self.interner, &mut self.query_parsed.diag, ); diff --git a/crates/plotnik-lib/src/query/type_check/context.rs b/crates/plotnik-lib/src/query/type_check/context.rs index 8d7988d6..bf8a4d50 100644 --- a/crates/plotnik-lib/src/query/type_check/context.rs +++ b/crates/plotnik-lib/src/query/type_check/context.rs @@ -16,20 +16,21 @@ use super::types::{ /// Central registry for types, symbols, and expression metadata. #[derive(Debug, Clone)] pub struct TypeContext { + // Storage /// Interned types by ID types: Vec, /// Deduplication map for type interning type_map: HashMap, - /// Cached term info per expression - term_info: HashMap, - /// Definition-level type info (for TypeScript emission), keyed by DefId - def_types: HashMap, /// DefId → Symbol mapping (for resolving def names) def_names: Vec, /// Symbol → DefId reverse lookup def_ids: HashMap, + /// Definition-level type info (for TypeScript emission), keyed by DefId + def_types: HashMap, /// Definitions that are part of a recursive SCC recursive_defs: HashSet, + /// Cached term info per expression + term_info: HashMap, } impl Default for TypeContext { @@ -43,11 +44,11 @@ impl TypeContext { let mut ctx = Self { types: Vec::new(), type_map: HashMap::new(), - term_info: HashMap::new(), - def_types: HashMap::new(), def_names: Vec::new(), def_ids: HashMap::new(), + def_types: HashMap::new(), recursive_defs: HashSet::new(), + term_info: HashMap::new(), }; // Pre-register builtin types at their expected IDs diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs index c242ea11..6743b16d 100644 --- a/crates/plotnik-lib/src/query/type_check/infer.rs +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -29,11 +29,12 @@ use super::unify::{UnifyError, unify_flows}; /// Inference context for a single pass over the AST. pub struct InferenceVisitor<'a, 'd> { + // References / Contexts pub ctx: &'a mut TypeContext, pub interner: &'a mut Interner, pub symbol_table: &'a SymbolTable, - pub diag: &'d mut Diagnostics, pub source_id: SourceId, + pub diag: &'d mut Diagnostics, } impl<'a, 'd> InferenceVisitor<'a, 'd> { @@ -41,15 +42,15 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { ctx: &'a mut TypeContext, interner: &'a mut Interner, symbol_table: &'a SymbolTable, - diag: &'d mut Diagnostics, source_id: SourceId, + diag: &'d mut Diagnostics, ) -> Self { Self { ctx, interner, symbol_table, - diag, source_id, + diag, } } @@ -602,10 +603,10 @@ pub fn infer_root( ctx: &mut TypeContext, interner: &mut Interner, symbol_table: &SymbolTable, - diag: &mut Diagnostics, source_id: SourceId, root: &Root, + diag: &mut Diagnostics, ) { - let mut visitor = InferenceVisitor::new(ctx, interner, symbol_table, diag, source_id); + let mut visitor = InferenceVisitor::new(ctx, interner, symbol_table, source_id, diag); visitor.visit(root); } diff --git a/crates/plotnik-lib/src/query/type_check/mod.rs b/crates/plotnik-lib/src/query/type_check/mod.rs index 1b4d1f6c..1a5ba1ca 100644 --- a/crates/plotnik-lib/src/query/type_check/mod.rs +++ b/crates/plotnik-lib/src/query/type_check/mod.rs @@ -38,11 +38,11 @@ use infer::infer_root; /// Processes definitions in dependency order (leaves first) to handle /// recursive definitions correctly. pub fn infer_types( + interner: &mut Interner, ast_map: &IndexMap, symbol_table: &SymbolTable, dependency_analysis: &DependencyAnalysis, diag: &mut Diagnostics, - interner: &mut Interner, ) -> TypeContext { let mut ctx = TypeContext::new(); @@ -88,7 +88,7 @@ pub fn infer_types( }; // Run inference on this root - infer_root(&mut ctx, interner, symbol_table, diag, source_id, root); + infer_root(&mut ctx, interner, symbol_table, source_id, root, diag); // Register the definition's output type if let Some(body) = symbol_table.get(def_name) @@ -110,7 +110,7 @@ pub fn infer_types( continue; }; - infer_root(&mut ctx, interner, symbol_table, diag, source_id, root); + infer_root(&mut ctx, interner, symbol_table, source_id, root, diag); if let Some(body) = symbol_table.get(name) && let Some(info) = ctx.get_term_info(body).cloned() From 183e9a4e5abf60adbf0b45bbf210a015bc5e1ae7 Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 17:37:00 -0300 Subject: [PATCH 13/18] refactor infer.rs --- .../plotnik-lib/src/query/type_check/infer.rs | 498 +++++++++--------- 1 file changed, 250 insertions(+), 248 deletions(-) diff --git a/crates/plotnik-lib/src/query/type_check/infer.rs b/crates/plotnik-lib/src/query/type_check/infer.rs index 6743b16d..e1390440 100644 --- a/crates/plotnik-lib/src/query/type_check/infer.rs +++ b/crates/plotnik-lib/src/query/type_check/infer.rs @@ -4,12 +4,17 @@ //! Reports diagnostics for type errors like strict dimensionality violations. use std::collections::BTreeMap; +use std::collections::btree_map::Entry; use plotnik_core::Interner; +use rowan::TextRange; +use super::context::TypeContext; use super::symbol::Symbol; - -use rowan::TextRange; +use super::types::{ + Arity, FieldInfo, QuantifierKind, TYPE_NODE, TYPE_STRING, TermInfo, TypeFlow, TypeId, TypeKind, +}; +use super::unify::{UnifyError, unify_flows}; use crate::diagnostics::{DiagnosticKind, Diagnostics}; use crate::parser::ast::{ @@ -21,15 +26,8 @@ use crate::query::source_map::SourceId; use crate::query::symbol_table::SymbolTable; use crate::query::visitor::{Visitor, walk_alt_expr, walk_def, walk_named_node, walk_seq_expr}; -use super::context::TypeContext; -use super::types::{ - Arity, FieldInfo, QuantifierKind, TYPE_NODE, TYPE_STRING, TermInfo, TypeFlow, TypeId, TypeKind, -}; -use super::unify::{UnifyError, unify_flows}; - /// Inference context for a single pass over the AST. pub struct InferenceVisitor<'a, 'd> { - // References / Contexts pub ctx: &'a mut TypeContext, pub interner: &'a mut Interner, pub symbol_table: &'a SymbolTable, @@ -56,12 +54,11 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { /// Infer the TermInfo for an expression, caching the result. pub fn infer_expr(&mut self, expr: &Expr) -> TermInfo { - // Check cache first if let Some(info) = self.ctx.get_term_info(expr) { return info.clone(); } - // Insert sentinel to break cycles + // Sentinel to break recursion cycles self.ctx.set_term_info(expr.clone(), TermInfo::void()); let info = self.compute_expr(expr); @@ -82,7 +79,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { } } - /// Named node: matches one position, bubbles up child captures + /// Named node: matches one position, bubbles up child captures. fn infer_named_node(&mut self, node: &NamedNode) -> TermInfo { let mut merged_fields: BTreeMap = BTreeMap::new(); @@ -93,6 +90,7 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { && let Some(fields) = self.ctx.get_struct_fields(type_id) { for (name, info) in fields { + // Named nodes merge fields silently (union behavior) merged_fields.entry(*name).or_insert(*info); } } @@ -107,31 +105,11 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { TermInfo::new(Arity::One, flow) } - /// Anonymous node (literal or wildcard): matches one position, produces nothing + /// Anonymous node (literal or wildcard): matches one position, produces nothing. fn infer_anonymous_node(&mut self, _node: &AnonymousNode) -> TermInfo { TermInfo::new(Arity::One, TypeFlow::Void) } - /// If expr is (or contains at its core) a recursive Ref, return its Ref type. - fn get_recursive_ref_type(&mut self, expr: &Expr) -> Option { - match expr { - Expr::Ref(r) => { - let name_tok = r.name()?; - let name_sym = self.interner.intern(name_tok.text()); - let def_id = self.ctx.get_def_id_sym(name_sym)?; - if self.ctx.is_recursive(def_id) { - Some(self.ctx.intern_type(TypeKind::Ref(def_id))) - } else { - None - } - } - Expr::QuantifiedExpr(q) => q.inner().and_then(|i| self.get_recursive_ref_type(&i)), - Expr::CapturedExpr(c) => c.inner().and_then(|i| self.get_recursive_ref_type(&i)), - Expr::FieldExpr(f) => f.value().and_then(|v| self.get_recursive_ref_type(&v)), - _ => None, - } - } - /// Reference: transparent for non-recursive defs, opaque boundary for recursive ones. fn infer_ref(&mut self, r: &Ref) -> TermInfo { let Some(name_tok) = r.name() else { @@ -148,21 +126,18 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { return TermInfo::new(Arity::One, TypeFlow::Void); } - // Get the body expression for this definition let Some(body) = self.symbol_table.get(name) else { - // Undefined ref - already reported by symbol_table pass return TermInfo::void(); }; - // Non-recursive refs are transparent - propagate body's flow and arity + // Non-recursive refs are transparent self.infer_expr(body) } - /// Sequence: One if 0-1 children, else Many; merge children's fields + /// Sequence: Arity aggregation and strict field merging (no duplicates). fn infer_seq_expr(&mut self, seq: &SeqExpr) -> TermInfo { let children: Vec<_> = seq.children().collect(); - // Compute arity based on child count let arity = match children.len() { 0 | 1 => children .first() @@ -171,37 +146,18 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { _ => Arity::Many, }; - // Merge fields from all children let mut merged_fields: BTreeMap = BTreeMap::new(); for child in &children { let child_info = self.infer_expr(child); - if let TypeFlow::Bubble(type_id) = child_info.flow - && let Some(fields) = self.ctx.get_struct_fields(type_id) - { - for (&name, &info) in fields { - use std::collections::btree_map::Entry; - match merged_fields.entry(name) { - Entry::Vacant(e) => { - e.insert(info); - } - Entry::Occupied(_) => { - // Duplicate capture in same scope - error - self.diag - .report( - self.source_id, - DiagnosticKind::DuplicateCaptureInScope, - child.text_range(), - ) - .message(self.interner.resolve(name)) - .emit(); - } - } + if let TypeFlow::Bubble(type_id) = child_info.flow { + // Clone fields to release immutable borrow on self.ctx, + // allowing mutable borrow of self for merge_seq_fields. + if let Some(fields) = self.ctx.get_struct_fields(type_id).cloned() { + self.merge_seq_fields(&mut merged_fields, &fields, child.text_range()); } } - // Void and Scalar children don't contribute fields - // (Scalar would be from refs, which are scope boundaries) } let flow = if merged_fields.is_empty() { @@ -213,11 +169,34 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { TermInfo::new(arity, flow) } - /// Alternation: arity is Many if ANY branch is Many; type depends on tagged vs untagged - fn infer_alt_expr(&mut self, alt: &AltExpr) -> TermInfo { - let kind = alt.kind(); + fn merge_seq_fields( + &mut self, + target: &mut BTreeMap, + source: &BTreeMap, + range: TextRange, + ) { + for (&name, &info) in source { + match target.entry(name) { + Entry::Vacant(e) => { + e.insert(info); + } + Entry::Occupied(_) => { + self.diag + .report( + self.source_id, + DiagnosticKind::DuplicateCaptureInScope, + range, + ) + .message(self.interner.resolve(name)) + .emit(); + } + } + } + } - match kind { + /// Alternation: arity is Many if ANY branch is Many. + fn infer_alt_expr(&mut self, alt: &AltExpr) -> TermInfo { + match alt.kind() { AltKind::Tagged => self.infer_tagged_alt(alt), AltKind::Untagged | AltKind::Mixed => self.infer_untagged_alt(alt), } @@ -234,23 +213,17 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { let label_sym = self.interner.intern(label.text()); let Some(body) = branch.body() else { - // Empty variant gets void/empty struct type - variants.insert( - label_sym, - self.ctx.intern_type(TypeKind::Struct(BTreeMap::new())), - ); + // Empty variant -> empty struct + let empty_struct = self.ctx.intern_struct(BTreeMap::new()); + variants.insert(label_sym, empty_struct); continue; }; let body_info = self.infer_expr(&body); combined_arity = combined_arity.combine(body_info.arity); - - // Convert flow to a type for this variant - let variant_type = self.flow_to_type(&body_info.flow); - variants.insert(label_sym, variant_type); + variants.insert(label_sym, self.flow_to_type(&body_info.flow)); } - // Tagged alternation produces an Enum type let enum_type = self.ctx.intern_type(TypeKind::Enum(variants)); TermInfo::new(combined_arity, TypeFlow::Scalar(enum_type)) } @@ -259,22 +232,22 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { let mut flows: Vec = Vec::new(); let mut combined_arity = Arity::One; - // Handle both direct exprs and branches without labels + // Collect from branches for branch in alt.branches() { if let Some(body) = branch.body() { - let body_info = self.infer_expr(&body); - combined_arity = combined_arity.combine(body_info.arity); - flows.push(body_info.flow); + let info = self.infer_expr(&body); + combined_arity = combined_arity.combine(info.arity); + flows.push(info.flow); } } + // Collect from direct expressions for expr in alt.exprs() { - let expr_info = self.infer_expr(&expr); - combined_arity = combined_arity.combine(expr_info.arity); - flows.push(expr_info.flow); + let info = self.infer_expr(&expr); + combined_arity = combined_arity.combine(info.arity); + flows.push(info.flow); } - // Unify all flows let unified_flow = match unify_flows(self.ctx, flows) { Ok(flow) => flow, Err(err) => { @@ -286,96 +259,122 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { TermInfo::new(combined_arity, unified_flow) } - /// Captured expression: wraps inner's flow into a field + /// Captured expression: wraps inner's flow into a field. fn infer_captured_expr(&mut self, cap: &CapturedExpr) -> TermInfo { let Some(name_tok) = cap.name() else { - // Missing name - recover gracefully + // Recover gracefully return cap .inner() - .map(|inner| self.infer_expr(&inner)) + .map(|i| self.infer_expr(&i)) .unwrap_or_else(TermInfo::void); }; let capture_name = self.interner.intern(name_tok.text()); - // Check for type annotation - let annotation_type = cap.type_annotation().and_then(|t| { - t.name().map(|n| { - let type_name = n.text(); - if type_name == "string" { - TYPE_STRING - } else { - let type_sym = self.interner.intern(type_name); - self.ctx.intern_type(TypeKind::Custom(type_sym)) - } - }) - }); - + let annotation_type = self.resolve_annotation(cap); let Some(inner) = cap.inner() else { - // Capture without inner - still produces a field + // Capture without inner -> creates a Node field let type_id = annotation_type.unwrap_or(TYPE_NODE); + let field = FieldInfo::required(type_id); return TermInfo::new( Arity::One, - TypeFlow::Bubble( - self.ctx - .intern_single_field(capture_name, FieldInfo::required(type_id)), - ), + TypeFlow::Bubble(self.ctx.intern_single_field(capture_name, field)), ); }; - // Special handling for quantifiers: - // - * or +: this capture serves as row capture, skip strict dimensionality - // - ?: capture produces an optional field - let (inner_info, is_optional_capture) = if let Expr::QuantifiedExpr(q) = &inner { + // Determine how inner flow relates to capture (e.g., ? makes field optional) + let (inner_info, is_optional) = self.resolve_capture_inner(&inner); + + let captured_type = self.determine_captured_type(&inner, &inner_info, annotation_type); + let field_info = if is_optional { + FieldInfo::optional(captured_type) + } else { + FieldInfo::required(captured_type) + }; + + TermInfo::new( + inner_info.arity, + TypeFlow::Bubble(self.ctx.intern_single_field(capture_name, field_info)), + ) + } + + /// Resolves explicit type annotation like `@foo: string`. + fn resolve_annotation(&mut self, cap: &CapturedExpr) -> Option { + cap.type_annotation().and_then(|t| { + t.name().map(|n| { + let text = n.text(); + if text == "string" { + TYPE_STRING + } else { + let sym = self.interner.intern(text); + self.ctx.intern_type(TypeKind::Custom(sym)) + } + }) + }) + } + + /// Logic for how quantifier on the inner expression affects the capture field. + /// Returns (Info, is_optional). + fn resolve_capture_inner(&mut self, inner: &Expr) -> (TermInfo, bool) { + if let Expr::QuantifiedExpr(q) = inner { let quantifier = self.parse_quantifier(q); match quantifier { + // * or + acts as row capture here (skipping strict dimensionality) QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore => { (self.infer_quantified_expr_as_row(q), false) } - QuantifierKind::Optional => (self.infer_expr(&inner), true), + // ? makes the resulting capture field optional + QuantifierKind::Optional => (self.infer_expr(inner), true), } } else { - (self.infer_expr(&inner), false) - }; + (self.infer_expr(inner), false) + } + } - // Transform based on inner's flow - let captured_type = match &inner_info.flow { + /// Transforms the inner flow into a specific TypeId for the field. + fn determine_captured_type( + &mut self, + inner: &Expr, + inner_info: &TermInfo, + annotation: Option, + ) -> TypeId { + match &inner_info.flow { TypeFlow::Void => { - // Check if inner is a recursive ref - if so, capture produces Ref type - if let Some(ref_type) = self.get_recursive_ref_type(&inner) { - annotation_type.unwrap_or(ref_type) + if let Some(ref_type) = self.get_recursive_ref_type(inner) { + annotation.unwrap_or(ref_type) } else { - // @name on Void → capture produces Node (or annotated type) - annotation_type.unwrap_or(TYPE_NODE) + annotation.unwrap_or(TYPE_NODE) } } - TypeFlow::Scalar(type_id) => { - // @name on Scalar → capture that scalar type - annotation_type.unwrap_or(*type_id) - } - TypeFlow::Bubble(type_id) => { - // @name on Bubble → capture the struct type directly - annotation_type.unwrap_or(*type_id) - } - }; - - let field_info = if is_optional_capture { - FieldInfo::optional(captured_type) - } else { - FieldInfo::required(captured_type) - }; + TypeFlow::Scalar(type_id) => annotation.unwrap_or(*type_id), + TypeFlow::Bubble(type_id) => annotation.unwrap_or(*type_id), + } + } - TermInfo::new( - inner_info.arity, - TypeFlow::Bubble(self.ctx.intern_single_field(capture_name, field_info)), - ) + /// If expr is (or contains) a recursive Ref, return its Ref type. + fn get_recursive_ref_type(&mut self, expr: &Expr) -> Option { + match expr { + Expr::Ref(r) => { + let name_tok = r.name()?; + let name = name_tok.text(); + let sym = self.interner.intern(name); + let def_id = self.ctx.get_def_id_sym(sym)?; + if self.ctx.is_recursive(def_id) { + Some(self.ctx.intern_type(TypeKind::Ref(def_id))) + } else { + None + } + } + Expr::QuantifiedExpr(q) => self.get_recursive_ref_type(&q.inner()?), + Expr::CapturedExpr(c) => self.get_recursive_ref_type(&c.inner()?), + Expr::FieldExpr(f) => self.get_recursive_ref_type(&f.value()?), + _ => None, + } } - /// Quantified expression: applies quantifier to inner's flow fn infer_quantified_expr(&mut self, quant: &QuantifiedExpr) -> TermInfo { self.infer_quantified_expr_impl(quant, false) } - /// Quantified expression when used as a row capture (skip strict dimensionality check) fn infer_quantified_expr_as_row(&mut self, quant: &QuantifiedExpr) -> TermInfo { self.infer_quantified_expr_impl(quant, true) } @@ -392,72 +391,66 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { let inner_info = self.infer_expr(&inner); let quantifier = self.parse_quantifier(quant); - match quantifier { - QuantifierKind::Optional => { - // `?` makes fields optional, doesn't add dimensionality - let flow = match inner_info.flow { - TypeFlow::Void => TypeFlow::Void, - TypeFlow::Scalar(t) => { - TypeFlow::Scalar(self.ctx.intern_type(TypeKind::Optional(t))) - } - TypeFlow::Bubble(type_id) => { - // Make all fields optional - let fields = self - .ctx - .get_struct_fields(type_id) - .cloned() - .unwrap_or_default(); - let optional_fields = fields - .into_iter() - .map(|(k, v)| (k, v.make_optional())) - .collect(); - TypeFlow::Bubble(self.ctx.intern_struct(optional_fields)) - } - }; - TermInfo::new(inner_info.arity, flow) - } - + let flow = match quantifier { + QuantifierKind::Optional => self.make_flow_optional(inner_info.flow), QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore => { - // * and + require strict dimensionality (unless this is a row capture) if !is_row_capture { self.check_strict_dimensionality(quant, &inner_info); } + self.make_flow_array(inner_info.flow, &inner, quantifier.is_non_empty()) + } + }; + + TermInfo::new(inner_info.arity, flow) + } + + fn make_flow_optional(&mut self, flow: TypeFlow) -> TypeFlow { + match flow { + TypeFlow::Void => TypeFlow::Void, + TypeFlow::Scalar(t) => TypeFlow::Scalar(self.ctx.intern_type(TypeKind::Optional(t))), + TypeFlow::Bubble(type_id) => { + let fields = self + .ctx + .get_struct_fields(type_id) + .cloned() + .unwrap_or_default(); + let optional_fields = fields + .into_iter() + .map(|(k, v)| (k, v.make_optional())) + .collect(); + TypeFlow::Bubble(self.ctx.intern_struct(optional_fields)) + } + } + } - let flow = match inner_info.flow { - TypeFlow::Void => { - // Scalar list: void inner becomes array of Node (or Ref for recursive) - let element = self.get_recursive_ref_type(&inner).unwrap_or(TYPE_NODE); - let array_type = self.ctx.intern_type(TypeKind::Array { - element, - non_empty: quantifier.is_non_empty(), - }); - TypeFlow::Scalar(array_type) - } - TypeFlow::Scalar(t) => { - // Scalar becomes array - let array_type = self.ctx.intern_type(TypeKind::Array { - element: t, - non_empty: quantifier.is_non_empty(), - }); - TypeFlow::Scalar(array_type) - } - TypeFlow::Bubble(struct_type) => { - // Fields with * or + and no row capture is an error - // (already reported by check_strict_dimensionality if !is_row_capture) - // Return array of struct as best-effort - let array_type = self.ctx.intern_type(TypeKind::Array { - element: struct_type, - non_empty: quantifier.is_non_empty(), - }); - TypeFlow::Scalar(array_type) - } - }; - TermInfo::new(inner_info.arity, flow) + fn make_flow_array(&mut self, flow: TypeFlow, inner: &Expr, non_empty: bool) -> TypeFlow { + match flow { + TypeFlow::Void => { + // Scalar list: void inner -> array of Node (or Ref) + let element = self.get_recursive_ref_type(inner).unwrap_or(TYPE_NODE); + let array_type = self.ctx.intern_type(TypeKind::Array { element, non_empty }); + TypeFlow::Scalar(array_type) + } + TypeFlow::Scalar(t) => { + let array_type = self.ctx.intern_type(TypeKind::Array { + element: t, + non_empty, + }); + TypeFlow::Scalar(array_type) + } + TypeFlow::Bubble(struct_type) => { + // Note: Bubble with * or + is strictly invalid unless it's a row capture, + // but we construct a valid type as fallback. + let array_type = self.ctx.intern_type(TypeKind::Array { + element: struct_type, + non_empty, + }); + TypeFlow::Scalar(array_type) } } } - /// Field expression: arity One, delegates type to value + /// Field expression: arity One, delegates type to value. fn infer_field_expr(&mut self, field: &FieldExpr) -> TermInfo { let Some(value) = field.value() else { return TermInfo::void(); @@ -465,66 +458,76 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { let value_info = self.infer_expr(&value); - // Field validation: value must have arity One + // Validation: Fields cannot be assigned 'Many' arity values directly if value_info.arity == Arity::Many { - let field_name = field - .name() - .map(|t| t.text().to_string()) - .unwrap_or_else(|| "field".to_string()); + self.report_field_arity_error(field, &value); + } - let mut builder = self.diag.report( - self.source_id, - DiagnosticKind::FieldSequenceValue, - value.text_range(), - ); - builder = builder.message(field_name); + TermInfo::new(Arity::One, value_info.flow) + } - // If value is a reference, add related info - if let Expr::Ref(r) = &value - && let Some(name_tok) = r.name() - && let Some((def_source, def_body)) = self.symbol_table.get_full(name_tok.text()) - { - builder = builder.related_to(def_source, def_body.text_range(), "defined here"); + fn report_field_arity_error(&mut self, field: &FieldExpr, value: &Expr) { + let field_name = field + .name() + .map(|t| t.text().to_string()) + .unwrap_or_else(|| "field".to_string()); + + let mut builder = self.diag.report( + self.source_id, + DiagnosticKind::FieldSequenceValue, + value.text_range(), + ); + builder = builder.message(field_name); + + if let Expr::Ref(r) = value + && let Some(name_tok) = r.name() + { + let name = name_tok.text(); + if let Some((src, body)) = self.symbol_table.get_full(name) { + builder = builder.related_to(src, body.text_range(), "defined here"); } - - builder.emit(); } - // Field itself has arity One; flow passes through - TermInfo::new(Arity::One, value_info.flow) + builder.emit(); } /// Check strict dimensionality rule for * and + quantifiers. + /// Captures inside a quantifier are forbidden unless marked as a row capture. fn check_strict_dimensionality(&mut self, quant: &QuantifiedExpr, inner_info: &TermInfo) { - // If inner has fields (captures), that's a violation - if let TypeFlow::Bubble(type_id) = &inner_info.flow - && let Some(fields) = self.ctx.get_struct_fields(*type_id) - && !fields.is_empty() - { - let op = quant - .operator() - .map(|t| t.text().to_string()) - .unwrap_or_else(|| "*".to_string()); - - let capture_names: Vec<_> = fields - .keys() - .map(|s| format!("`@{}`", self.interner.resolve(*s))) - .collect(); - let captures_str = capture_names.join(", "); - - self.diag - .report( - self.source_id, - DiagnosticKind::StrictDimensionalityViolation, - quant.text_range(), - ) - .message(format!( - "quantifier `{}` contains captures ({}) but no row capture", - op, captures_str - )) - .hint("wrap as `{...}* @rows`") - .emit(); + let TypeFlow::Bubble(type_id) = &inner_info.flow else { + return; + }; + + let Some(fields) = self.ctx.get_struct_fields(*type_id) else { + return; + }; + if fields.is_empty() { + return; } + + let op = quant + .operator() + .map(|t| t.text().to_string()) + .unwrap_or_else(|| "*".to_string()); + + let capture_names: Vec<_> = fields + .keys() + .map(|s| format!("`@{}`", self.interner.resolve(*s))) + .collect(); + let captures_str = capture_names.join(", "); + + self.diag + .report( + self.source_id, + DiagnosticKind::StrictDimensionalityViolation, + quant.text_range(), + ) + .message(format!( + "quantifier `{}` contains captures ({}) but no row capture", + op, captures_str + )) + .hint("wrap as `{...}* @rows`") + .emit(); } fn parse_quantifier(&self, quant: &QuantifiedExpr) -> QuantifierKind { @@ -540,7 +543,6 @@ impl<'a, 'd> InferenceVisitor<'a, 'd> { } } - /// Convert a TypeFlow to a TypeId for storage in enum variants, etc. fn flow_to_type(&mut self, flow: &TypeFlow) -> TypeId { match flow { TypeFlow::Void => self.ctx.intern_struct(BTreeMap::new()), @@ -585,7 +587,7 @@ impl Visitor for InferenceVisitor<'_, '_> { } fn visit_named_node(&mut self, node: &NamedNode) { - // Visit children first (bottom-up) + // Bottom-up traversal walk_named_node(self, node); } From 9afda974276ecccdf4512815b72f8f27de2189dd Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 17:40:12 -0300 Subject: [PATCH 14/18] refactor context.rs --- .../src/query/type_check/context.rs | 40 ++++++------------- 1 file changed, 13 insertions(+), 27 deletions(-) diff --git a/crates/plotnik-lib/src/query/type_check/context.rs b/crates/plotnik-lib/src/query/type_check/context.rs index bf8a4d50..4219f150 100644 --- a/crates/plotnik-lib/src/query/type_check/context.rs +++ b/crates/plotnik-lib/src/query/type_check/context.rs @@ -16,20 +16,16 @@ use super::types::{ /// Central registry for types, symbols, and expression metadata. #[derive(Debug, Clone)] pub struct TypeContext { - // Storage - /// Interned types by ID types: Vec, - /// Deduplication map for type interning type_map: HashMap, - /// DefId → Symbol mapping (for resolving def names) + def_names: Vec, - /// Symbol → DefId reverse lookup def_ids: HashMap, /// Definition-level type info (for TypeScript emission), keyed by DefId def_types: HashMap, /// Definitions that are part of a recursive SCC recursive_defs: HashSet, - /// Cached term info per expression + term_info: HashMap, } @@ -101,9 +97,7 @@ impl TypeContext { /// Intern a struct type with a single field. pub fn intern_single_field(&mut self, name: Symbol, info: FieldInfo) -> TypeId { - let mut fields = BTreeMap::new(); - fields.insert(name, info); - self.intern_type(TypeKind::Struct(fields)) + self.intern_type(TypeKind::Struct(BTreeMap::from([(name, info)]))) } /// Get struct fields from a TypeId, if it points to a Struct. @@ -125,16 +119,9 @@ impl TypeContext { } /// Register a definition by name, returning its DefId. - /// If already registered, returns existing DefId. pub fn register_def(&mut self, interner: &mut Interner, name: &str) -> DefId { let sym = interner.intern(name); - if let Some(&def_id) = self.def_ids.get(&sym) { - return def_id; - } - let def_id = DefId::from_raw(self.def_names.len() as u32); - self.def_names.push(sym); - self.def_ids.insert(sym, def_id); - def_id + self.register_def_sym(sym) } /// Register a definition by pre-interned Symbol, returning its DefId. @@ -142,6 +129,7 @@ impl TypeContext { if let Some(&def_id) = self.def_ids.get(&sym) { return def_id; } + let def_id = DefId::from_raw(self.def_names.len() as u32); self.def_names.push(sym); self.def_ids.insert(sym, def_id); @@ -155,13 +143,11 @@ impl TypeContext { /// Get DefId for a definition name (requires interner for lookup). pub fn get_def_id(&self, interner: &Interner, name: &str) -> Option { - // Linear scan - only used during analysis, not hot path - for (&sym, &def_id) in &self.def_ids { - if interner.resolve(sym) == name { - return Some(def_id); - } - } - None + // Linear scan - only used during analysis, not hot path. + // Necessary because we don't assume Interner has reverse lookup here. + self.def_ids + .iter() + .find_map(|(&sym, &id)| (interner.resolve(sym) == name).then_some(id)) } /// Get the name Symbol for a DefId. @@ -193,7 +179,7 @@ impl TypeContext { /// Registers the def if not already known. pub fn set_def_type_by_name(&mut self, interner: &mut Interner, name: &str, type_id: TypeId) { let def_id = self.register_def(interner, name); - self.def_types.insert(def_id, type_id); + self.set_def_type(def_id, type_id); } /// Get the output type for a definition by DefId. @@ -203,8 +189,8 @@ impl TypeContext { /// Get the output type for a definition by string name. pub fn get_def_type_by_name(&self, interner: &Interner, name: &str) -> Option { - self.get_def_id(interner, name) - .and_then(|id| self.def_types.get(&id).copied()) + let id = self.get_def_id(interner, name)?; + self.get_def_type(id) } /// Get arity for an expression. From 1b00e58dd162c6ea51bb77735553e8ea51075a2c Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 18:09:00 -0300 Subject: [PATCH 15/18] Refactor other --- .../src/query/type_check/emit_ts.rs | 605 ++++++++---------- .../plotnik-lib/src/query/type_check/mod.rs | 190 +++--- .../src/query/type_check/symbol.rs | 11 +- .../plotnik-lib/src/query/type_check/types.rs | 169 +++-- .../plotnik-lib/src/query/type_check/unify.rs | 82 ++- 5 files changed, 487 insertions(+), 570 deletions(-) diff --git a/crates/plotnik-lib/src/query/type_check/emit_ts.rs b/crates/plotnik-lib/src/query/type_check/emit_ts.rs index 55f128c2..3c9fd83b 100644 --- a/crates/plotnik-lib/src/query/type_check/emit_ts.rs +++ b/crates/plotnik-lib/src/query/type_check/emit_ts.rs @@ -47,6 +47,7 @@ pub struct TsEmitter<'a> { ctx: &'a TypeContext, interner: &'a Interner, config: EmitConfig, + /// Generated type names, to avoid collisions used_names: BTreeSet, /// TypeId -> generated name mapping @@ -76,16 +77,7 @@ impl<'a> TsEmitter<'a> { /// Emit TypeScript for all definition types. pub fn emit(mut self) -> String { - // First pass: collect all types that need names with context - self.collect_type_names_with_context(); - - // Second pass: collect referenced builtins and custom types - self.collect_references(); - - // Emit Node type if configured and actually used - if self.config.emit_node_type && self.referenced_builtins.contains(&TYPE_NODE) { - self.emit_node_type(); - } + self.prepare_emission(); // Collect definition names for lookup let def_names: HashMap = self @@ -99,17 +91,18 @@ impl<'a> TsEmitter<'a> { }) .collect(); - // Compute topological order (leaves first) - let sorted = self.topological_sort(); + // Collect all reachable types starting from definitions + let mut to_emit = HashSet::new(); + for (_, type_id) in self.ctx.iter_def_types() { + self.collect_reachable_types(type_id, &mut to_emit); + } - // Emit types in topological order - for type_id in sorted { + // Emit in topological order + for type_id in self.sort_topologically(to_emit) { if let Some(def_name) = def_names.get(&type_id) { - self.emit_definition(def_name, type_id); - } else if let Some(name) = self.type_names.get(&type_id).cloned() { - self.emit_nested_type(type_id, &name); - } else if let Some(TypeKind::Custom(sym)) = self.ctx.get_type(type_id) { - self.emit_custom_type_alias(self.interner.resolve(*sym)); + self.emit_type_definition(def_name, type_id); + } else { + self.emit_generated_or_custom(type_id); } } @@ -118,53 +111,77 @@ impl<'a> TsEmitter<'a> { /// Emit TypeScript for a single definition. pub fn emit_single(mut self, name: &str, type_id: TypeId) -> String { - self.collect_type_names_with_context(); - self.collect_references(); + self.prepare_emission(); - if self.config.emit_node_type && self.referenced_builtins.contains(&TYPE_NODE) { - self.emit_node_type(); - } + let mut to_emit = HashSet::new(); + self.collect_reachable_types(type_id, &mut to_emit); - // Compute topological order for types reachable from this definition - let sorted = self.topological_sort_from(type_id); + let sorted = self.sort_topologically(to_emit); - // Emit nested types first (in dependency order) - for nested_id in &sorted { - if *nested_id != type_id { - if let Some(nested_name) = self.type_names.get(nested_id).cloned() { - self.emit_nested_type(*nested_id, &nested_name); - } else if let Some(TypeKind::Custom(sym)) = self.ctx.get_type(*nested_id) { - self.emit_custom_type_alias(self.interner.resolve(*sym)); - } + // Emit dependencies (everything except the root) + for &dep_id in &sorted { + if dep_id != type_id { + self.emit_generated_or_custom(dep_id); } } - // Emit the main definition last - self.emit_definition(name, type_id); + // Emit the main definition + self.emit_type_definition(name, type_id); self.output } - /// Compute topological sort of all types (leaves first). - fn topological_sort(&self) -> Vec { - // Collect all types that need emission - let mut to_emit: HashSet = HashSet::new(); - for (_, type_id) in self.ctx.iter_def_types() { - self.collect_emittable_types(type_id, &mut to_emit); - } + fn prepare_emission(&mut self) { + self.assign_generated_names(); + self.collect_builtin_references(); - self.topo_sort_set(&to_emit) + if self.config.emit_node_type && self.referenced_builtins.contains(&TYPE_NODE) { + self.emit_node_interface(); + } } - /// Compute topological sort starting from a single type. - fn topological_sort_from(&self, root: TypeId) -> Vec { - let mut to_emit: HashSet = HashSet::new(); - self.collect_emittable_types(root, &mut to_emit); - self.topo_sort_set(&to_emit) + fn assign_generated_names(&mut self) { + // 1. Reserve definition names to avoid collisions + for (def_id, _) in self.ctx.iter_def_types() { + let name = self.ctx.def_name(self.interner, def_id); + self.used_names.insert(to_pascal_case(name)); + } + + // 2. Collect naming contexts (path from definition to type) + let mut contexts = HashMap::new(); + for (def_id, type_id) in self.ctx.iter_def_types() { + let def_name = self.ctx.def_name(self.interner, def_id); + self.collect_naming_contexts( + type_id, + &NamingContext { + def_name: def_name.to_string(), + field_name: None, + }, + &mut contexts, + ); + } + + // 3. Assign names to types that need them + for (id, kind) in self.ctx.iter_types() { + if !self.needs_generated_name(kind) || self.type_names.contains_key(&id) { + continue; + } + + let name = if let Some(ctx) = contexts.get(&id) { + self.generate_contextual_name(ctx) + } else { + self.generate_fallback_name(kind) + }; + self.type_names.insert(id, name); + } } - /// Collect all types reachable from `type_id` that need emission. - fn collect_emittable_types(&self, type_id: TypeId, out: &mut HashSet) { - if type_id.is_builtin() || out.contains(&type_id) { + fn collect_naming_contexts( + &self, + type_id: TypeId, + ctx: &NamingContext, + contexts: &mut HashMap, + ) { + if type_id.is_builtin() || contexts.contains_key(&type_id) { return; } @@ -174,42 +191,83 @@ impl<'a> TsEmitter<'a> { match kind { TypeKind::Struct(fields) => { - out.insert(type_id); - for info in fields.values() { - self.collect_emittable_types(info.type_id, out); + contexts.entry(type_id).or_insert_with(|| ctx.clone()); + for (&field_sym, info) in fields { + let field_name = self.interner.resolve(field_sym); + let field_ctx = NamingContext { + def_name: ctx.def_name.clone(), + field_name: Some(field_name.to_string()), + }; + self.collect_naming_contexts(info.type_id, &field_ctx, contexts); } } TypeKind::Enum(_) => { - // Variant payload structs are inlined in $data by emit_tagged_union, - // so don't collect them for separate emission - out.insert(type_id); + contexts.entry(type_id).or_insert_with(|| ctx.clone()); } TypeKind::Array { element, .. } => { - self.collect_emittable_types(*element, out); + self.collect_naming_contexts(*element, ctx, contexts); } TypeKind::Optional(inner) => { - self.collect_emittable_types(*inner, out); + self.collect_naming_contexts(*inner, ctx, contexts); } - TypeKind::Custom(_) => { - out.insert(type_id); + _ => {} + } + } + + fn collect_builtin_references(&mut self) { + for (_, type_id) in self.ctx.iter_def_types() { + self.collect_refs_recursive(type_id); + } + } + + fn collect_refs_recursive(&mut self, type_id: TypeId) { + if type_id == TYPE_NODE || type_id == TYPE_STRING { + self.referenced_builtins.insert(type_id); + return; + } + if type_id == TYPE_VOID { + return; + } + + let Some(kind) = self.ctx.get_type(type_id) else { + return; + }; + + match kind { + TypeKind::Node | TypeKind::Custom(_) => { + self.referenced_builtins.insert(TYPE_NODE); + } + TypeKind::String => { + self.referenced_builtins.insert(TYPE_STRING); + } + TypeKind::Struct(fields) => { + fields + .values() + .for_each(|info| self.collect_refs_recursive(info.type_id)); } + TypeKind::Enum(variants) => { + variants + .values() + .for_each(|&tid| self.collect_refs_recursive(tid)); + } + TypeKind::Array { element, .. } => self.collect_refs_recursive(*element), + TypeKind::Optional(inner) => self.collect_refs_recursive(*inner), _ => {} } } - /// Topologically sort a set of types (leaves first via Kahn's algorithm). - fn topo_sort_set(&self, types: &HashSet) -> Vec { - // Build adjacency: type -> types it depends on (within the set) + fn sort_topologically(&self, types: HashSet) -> Vec { let mut deps: HashMap> = HashMap::new(); let mut rdeps: HashMap> = HashMap::new(); - for &tid in types { + for &tid in &types { deps.entry(tid).or_default(); rdeps.entry(tid).or_default(); } - for &tid in types { - for dep in self.direct_deps(tid) { + // Build dependency graph + for &tid in &types { + for dep in self.get_direct_deps(tid) { if types.contains(&dep) && dep != tid { deps.entry(tid).or_default().insert(dep); rdeps.entry(dep).or_default().insert(tid); @@ -217,7 +275,7 @@ impl<'a> TsEmitter<'a> { } } - // Kahn's algorithm: start with nodes that have no dependencies + // Kahn's algorithm let mut result = Vec::with_capacity(types.len()); let mut queue: Vec = deps .iter() @@ -246,30 +304,51 @@ impl<'a> TsEmitter<'a> { result } - /// Get direct type dependencies of a type (non-recursive). - /// Unwraps Array/Optional to find actual emittable dependencies. - fn direct_deps(&self, type_id: TypeId) -> Vec { + fn collect_reachable_types(&self, type_id: TypeId, out: &mut HashSet) { + if type_id.is_builtin() || out.contains(&type_id) { + return; + } + let Some(kind) = self.ctx.get_type(type_id) else { - return vec![]; + return; }; + match kind { + TypeKind::Struct(fields) => { + out.insert(type_id); + for info in fields.values() { + self.collect_reachable_types(info.type_id, out); + } + } + TypeKind::Enum(_) | TypeKind::Custom(_) => { + out.insert(type_id); + } + TypeKind::Array { element, .. } => self.collect_reachable_types(*element, out), + TypeKind::Optional(inner) => self.collect_reachable_types(*inner, out), + _ => {} + } + } + + fn get_direct_deps(&self, type_id: TypeId) -> Vec { + let Some(kind) = self.ctx.get_type(type_id) else { + return vec![]; + }; match kind { TypeKind::Struct(fields) => fields .values() - .flat_map(|info| self.unwrap_to_emittable(info.type_id)) + .flat_map(|info| self.unwrap_for_deps(info.type_id)) .collect(), TypeKind::Enum(variants) => variants .values() - .flat_map(|&tid| self.unwrap_to_emittable(tid)) + .flat_map(|&tid| self.unwrap_for_deps(tid)) .collect(), - TypeKind::Array { element, .. } => self.unwrap_to_emittable(*element), - TypeKind::Optional(inner) => self.unwrap_to_emittable(*inner), + TypeKind::Array { element, .. } => self.unwrap_for_deps(*element), + TypeKind::Optional(inner) => self.unwrap_for_deps(*inner), _ => vec![], } } - /// Unwrap Array/Optional wrappers to find the underlying emittable type. - fn unwrap_to_emittable(&self, type_id: TypeId) -> Vec { + fn unwrap_for_deps(&self, type_id: TypeId) -> Vec { if type_id.is_builtin() { return vec![]; } @@ -277,205 +356,42 @@ impl<'a> TsEmitter<'a> { return vec![]; }; match kind { - TypeKind::Array { element, .. } => self.unwrap_to_emittable(*element), - TypeKind::Optional(inner) => self.unwrap_to_emittable(*inner), + TypeKind::Array { element, .. } => self.unwrap_for_deps(*element), + TypeKind::Optional(inner) => self.unwrap_for_deps(*inner), TypeKind::Struct(_) | TypeKind::Enum(_) | TypeKind::Custom(_) => vec![type_id], _ => vec![], } } - fn collect_type_names_with_context(&mut self) { - // Reserve definition names first - for (def_id, _) in self.ctx.iter_def_types() { - let name = self.ctx.def_name(self.interner, def_id); - let pascal_name = to_pascal_case(name); - self.used_names.insert(pascal_name); - } - - // Collect naming contexts by traversing definition types - let mut type_contexts: HashMap = HashMap::new(); - - for (def_id, type_id) in self.ctx.iter_def_types() { - let def_name = self.ctx.def_name(self.interner, def_id); - self.collect_contexts_for_type( - type_id, - &NamingContext { - def_name: def_name.to_string(), - field_name: None, - }, - &mut type_contexts, - ); - } - - // Assign names using contexts - for (id, kind) in self.ctx.iter_types() { - if self.needs_named_type(kind) && !self.type_names.contains_key(&id) { - let name = if let Some(ctx) = type_contexts.get(&id) { - self.generate_contextual_name(ctx) - } else { - self.generate_type_name(kind) - }; - self.type_names.insert(id, name); - } - } - } - - fn collect_contexts_for_type( - &self, - type_id: TypeId, - ctx: &NamingContext, - contexts: &mut HashMap, - ) { - if type_id.is_builtin() || contexts.contains_key(&type_id) { - return; - } - - let Some(kind) = self.ctx.get_type(type_id) else { + fn emit_generated_or_custom(&mut self, type_id: TypeId) { + if self.emitted.contains(&type_id) || type_id.is_builtin() { return; - }; - - match kind { - TypeKind::Struct(fields) => { - // Only set context if this type needs a name - contexts.entry(type_id).or_insert_with(|| ctx.clone()); - // Recurse into fields - for (&field_sym, info) in fields { - let field_name = self.interner.resolve(field_sym); - let field_ctx = NamingContext { - def_name: ctx.def_name.clone(), - field_name: Some(field_name.to_string()), - }; - self.collect_contexts_for_type(info.type_id, &field_ctx, contexts); - } - } - TypeKind::Enum(variants) => { - contexts.entry(type_id).or_insert_with(|| ctx.clone()); - // Don't recurse into variant types - they're inlined as $data - let _ = variants; - } - TypeKind::Array { element, .. } => { - self.collect_contexts_for_type(*element, ctx, contexts); - } - TypeKind::Optional(inner) => { - self.collect_contexts_for_type(*inner, ctx, contexts); - } - _ => {} } - } - fn generate_contextual_name(&mut self, ctx: &NamingContext) -> String { - let base = if let Some(field) = &ctx.field_name { - format!("{}{}", to_pascal_case(&ctx.def_name), to_pascal_case(field)) - } else { - to_pascal_case(&ctx.def_name) - }; - - self.unique_name(&base) - } - - fn collect_references(&mut self) { - for (_, type_id) in self.ctx.iter_def_types() { - self.collect_refs_in_type(type_id); + if let Some(name) = self.type_names.get(&type_id).cloned() { + self.emit_generated_type_def(type_id, &name); + } else if let Some(TypeKind::Custom(sym)) = self.ctx.get_type(type_id) { + self.emit_custom_type_alias(self.interner.resolve(*sym)); + self.emitted.insert(type_id); } } - fn collect_refs_in_type(&mut self, type_id: TypeId) { - if type_id == TYPE_NODE { - self.referenced_builtins.insert(TYPE_NODE); - return; - } - if type_id == TYPE_STRING { - self.referenced_builtins.insert(TYPE_STRING); - return; - } - if type_id == TYPE_VOID { - return; - } - + fn emit_generated_type_def(&mut self, type_id: TypeId, name: &str) { + self.emitted.insert(type_id); + let export = if self.config.export { "export " } else { "" }; let Some(kind) = self.ctx.get_type(type_id) else { return; }; match kind { - TypeKind::Node => { - self.referenced_builtins.insert(TYPE_NODE); - } - TypeKind::String => { - self.referenced_builtins.insert(TYPE_STRING); - } - TypeKind::Custom(_) => { - // Custom types alias to Node - self.referenced_builtins.insert(TYPE_NODE); - } - TypeKind::Struct(fields) => { - for info in fields.values() { - self.collect_refs_in_type(info.type_id); - } - } - TypeKind::Enum(variants) => { - for &vtype in variants.values() { - self.collect_refs_in_type(vtype); - } - } - TypeKind::Array { element, .. } => { - self.collect_refs_in_type(*element); - } - TypeKind::Optional(inner) => { - self.collect_refs_in_type(*inner); - } + TypeKind::Struct(fields) => self.emit_interface(name, fields, export), + TypeKind::Enum(variants) => self.emit_tagged_union(name, variants, export), _ => {} } } - fn needs_named_type(&self, kind: &TypeKind) -> bool { - matches!(kind, TypeKind::Struct(_) | TypeKind::Enum(_)) - } - - fn generate_type_name(&mut self, kind: &TypeKind) -> String { - let base = match kind { - TypeKind::Struct(_) => "Struct", - TypeKind::Enum(_) => "Enum", - _ => "Type", - }; - - self.unique_name(base) - } - - fn unique_name(&mut self, base: &str) -> String { - let base = to_pascal_case(base); - if !self.used_names.contains(&base) { - self.used_names.insert(base.clone()); - return base; - } - - let mut counter = 2; - loop { - let name = format!("{}{}", base, counter); - if !self.used_names.contains(&name) { - self.used_names.insert(name.clone()); - return name; - } - counter += 1; - } - } - - fn emit_node_type(&mut self) { - let export = if self.config.export { "export " } else { "" }; - - if self.config.verbose_nodes { - self.output.push_str(&format!( - "{}interface Node {{\n kind: string;\n text: string;\n startPosition: {{ row: number; column: number }};\n endPosition: {{ row: number; column: number }};\n}}\n\n", - export - )); - } else { - self.output.push_str(&format!( - "{}interface Node {{\n kind: string;\n text: string;\n}}\n\n", - export - )); - } - } - - fn emit_definition(&mut self, name: &str, type_id: TypeId) { + fn emit_type_definition(&mut self, name: &str, type_id: TypeId) { + self.emitted.insert(type_id); let export = if self.config.export { "export " } else { "" }; let type_name = to_pascal_case(name); @@ -491,7 +407,6 @@ impl<'a> TsEmitter<'a> { self.emit_tagged_union(&type_name, variants, export); } _ => { - // For non-struct types, emit a type alias let ts_type = self.type_to_ts(type_id); self.output .push_str(&format!("{}type {} = {};\n\n", export, type_name, ts_type)); @@ -499,22 +414,12 @@ impl<'a> TsEmitter<'a> { } } - fn emit_custom_type_alias(&mut self, name: &str) { - let export = if self.config.export { "export " } else { "" }; - self.output - .push_str(&format!("{}type {} = Node;\n\n", export, name)); - } - fn emit_interface(&mut self, name: &str, fields: &BTreeMap, export: &str) { self.output .push_str(&format!("{}interface {} {{\n", export, name)); - // Sort fields by resolved name for deterministic output - let mut sorted_fields: Vec<_> = fields.iter().collect(); - sorted_fields.sort_by_key(|&(&sym, _)| self.interner.resolve(sym)); - - for (&field_sym, info) in sorted_fields { - let field_name = self.interner.resolve(field_sym); + for (&sym, info) in self.sort_map_by_name(fields) { + let field_name = self.interner.resolve(sym); let ts_type = self.type_to_ts(info.type_id); let optional = if info.optional { "?" } else { "" }; self.output @@ -527,12 +432,8 @@ impl<'a> TsEmitter<'a> { fn emit_tagged_union(&mut self, name: &str, variants: &BTreeMap, export: &str) { let mut variant_types = Vec::new(); - // Sort variants by resolved name for deterministic output - let mut sorted_variants: Vec<_> = variants.iter().collect(); - sorted_variants.sort_by_key(|&(&sym, _)| self.interner.resolve(sym)); - - for (&variant_sym, &type_id) in sorted_variants { - let variant_name = self.interner.resolve(variant_sym); + for (&sym, &type_id) in self.sort_map_by_name(variants) { + let variant_name = self.interner.resolve(sym); let variant_type_name = format!("{}{}", name, to_pascal_case(variant_name)); variant_types.push(variant_type_name.clone()); @@ -548,52 +449,33 @@ impl<'a> TsEmitter<'a> { .push_str(&format!("{}type {} = {};\n\n", export, name, union)); } - /// Inline a type as $data value (struct fields inlined, others as-is) - fn inline_data_type(&self, type_id: TypeId) -> String { - let Some(kind) = self.ctx.get_type(type_id) else { - return "unknown".to_string(); - }; - - match kind { - TypeKind::Struct(fields) => self.inline_struct(fields), - TypeKind::Void => "{}".to_string(), - _ => self.type_to_ts(type_id), - } + fn emit_custom_type_alias(&mut self, name: &str) { + let export = if self.config.export { "export " } else { "" }; + self.output + .push_str(&format!("{}type {} = Node;\n\n", export, name)); } - /// Emit a nested type by its generated name. - fn emit_nested_type(&mut self, type_id: TypeId, name: &str) { - if self.emitted.contains(&type_id) || type_id.is_builtin() { - return; - } - self.emitted.insert(type_id); - - let Some(kind) = self.ctx.get_type(type_id) else { - return; - }; - + fn emit_node_interface(&mut self) { let export = if self.config.export { "export " } else { "" }; - - match kind { - TypeKind::Struct(fields) => { - self.emit_interface(name, fields, export); - } - TypeKind::Enum(variants) => { - self.emit_tagged_union(name, variants, export); - } - _ => {} + if self.config.verbose_nodes { + self.output.push_str(&format!( + "{}interface Node {{\n kind: string;\n text: string;\n startPosition: {{ row: number; column: number }};\n endPosition: {{ row: number; column: number }};\n}}\n\n", + export + )); + } else { + self.output.push_str(&format!( + "{}interface Node {{\n kind: string;\n text: string;\n}}\n\n", + export + )); } } fn type_to_ts(&self, type_id: TypeId) -> String { - if type_id == TYPE_VOID { - return "void".to_string(); - } - if type_id == TYPE_NODE { - return "Node".to_string(); - } - if type_id == TYPE_STRING { - return "string".to_string(); + match type_id { + TYPE_VOID => return "void".to_string(), + TYPE_NODE => return "Node".to_string(), + TYPE_STRING => return "string".to_string(), + _ => {} } let Some(kind) = self.ctx.get_type(type_id) else { @@ -614,7 +496,6 @@ impl<'a> TsEmitter<'a> { self.inline_struct(fields) } } - TypeKind::Enum(variants) => { if let Some(name) = self.type_names.get(&type_id) { name.clone() @@ -622,7 +503,6 @@ impl<'a> TsEmitter<'a> { self.inline_enum(variants) } } - TypeKind::Array { element, non_empty } => { let elem_type = self.type_to_ts(*element); if *non_empty { @@ -631,11 +511,7 @@ impl<'a> TsEmitter<'a> { format!("{}[]", elem_type) } } - - TypeKind::Optional(inner) => { - let inner_type = self.type_to_ts(*inner); - format!("{} | null", inner_type) - } + TypeKind::Optional(inner) => format!("{} | null", self.type_to_ts(*inner)), } } @@ -644,13 +520,10 @@ impl<'a> TsEmitter<'a> { return "{}".to_string(); } - // Sort fields by resolved name for deterministic output - let mut sorted_fields: Vec<_> = fields.iter().collect(); - sorted_fields.sort_by_key(|&(&sym, _)| self.interner.resolve(sym)); - - let field_strs: Vec<_> = sorted_fields - .iter() - .map(|&(&sym, info)| { + let field_strs: Vec<_> = self + .sort_map_by_name(fields) + .into_iter() + .map(|(&sym, info)| { let name = self.interner.resolve(sym); let ts_type = self.type_to_ts(info.type_id); let optional = if info.optional { "?" } else { "" }; @@ -662,13 +535,10 @@ impl<'a> TsEmitter<'a> { } fn inline_enum(&self, variants: &BTreeMap) -> String { - // Sort variants by resolved name for deterministic output - let mut sorted_variants: Vec<_> = variants.iter().collect(); - sorted_variants.sort_by_key(|&(&sym, _)| self.interner.resolve(sym)); - - let variant_strs: Vec<_> = sorted_variants - .iter() - .map(|&(&sym, &type_id)| { + let variant_strs: Vec<_> = self + .sort_map_by_name(variants) + .into_iter() + .map(|(&sym, &type_id)| { let name = self.interner.resolve(sym); let data_type = self.type_to_ts(type_id); format!("{{ $tag: \"{}\"; $data: {} }}", name, data_type) @@ -677,9 +547,65 @@ impl<'a> TsEmitter<'a> { variant_strs.join(" | ") } + + fn inline_data_type(&self, type_id: TypeId) -> String { + let Some(kind) = self.ctx.get_type(type_id) else { + return "unknown".to_string(); + }; + + match kind { + TypeKind::Struct(fields) => self.inline_struct(fields), + TypeKind::Void => "{}".to_string(), + _ => self.type_to_ts(type_id), + } + } + + fn needs_generated_name(&self, kind: &TypeKind) -> bool { + matches!(kind, TypeKind::Struct(_) | TypeKind::Enum(_)) + } + + fn generate_contextual_name(&mut self, ctx: &NamingContext) -> String { + let base = if let Some(field) = &ctx.field_name { + format!("{}{}", to_pascal_case(&ctx.def_name), to_pascal_case(field)) + } else { + to_pascal_case(&ctx.def_name) + }; + self.unique_name(&base) + } + + fn generate_fallback_name(&mut self, kind: &TypeKind) -> String { + let base = match kind { + TypeKind::Struct(_) => "Struct", + TypeKind::Enum(_) => "Enum", + _ => "Type", + }; + self.unique_name(base) + } + + fn unique_name(&mut self, base: &str) -> String { + let base = to_pascal_case(base); + if self.used_names.insert(base.clone()) { + return base; + } + + let mut counter = 2; + loop { + let name = format!("{}{}", base, counter); + if self.used_names.insert(name.clone()) { + return name; + } + counter += 1; + } + } + + /// Helper to iterate map sorted by resolved symbol name. + fn sort_map_by_name<'b, T>(&self, map: &'b BTreeMap) -> Vec<(&'b Symbol, &'b T)> { + let mut items: Vec<_> = map.iter().collect(); + items.sort_by_key(|&(&sym, _)| self.interner.resolve(sym)); + items + } } -/// Convert a string to PascalCase. fn to_pascal_case(s: &str) -> String { let mut result = String::with_capacity(s.len()); let mut capitalize_next = true; @@ -694,16 +620,13 @@ fn to_pascal_case(s: &str) -> String { result.push(c); } } - result } -/// Convenience function to emit TypeScript from a TypeContext. pub fn emit_typescript(ctx: &TypeContext, interner: &Interner) -> String { TsEmitter::new(ctx, interner, EmitConfig::default()).emit() } -/// Emit TypeScript with custom configuration. pub fn emit_typescript_with_config( ctx: &TypeContext, interner: &Interner, diff --git a/crates/plotnik-lib/src/query/type_check/mod.rs b/crates/plotnik-lib/src/query/type_check/mod.rs index 1a5ba1ca..74cc553b 100644 --- a/crates/plotnik-lib/src/query/type_check/mod.rs +++ b/crates/plotnik-lib/src/query/type_check/mod.rs @@ -2,8 +2,6 @@ //! //! Computes both structural arity (for field validation) and data flow types //! (for TypeScript emission) in a single traversal. -//! -//! Provides arity validation and type inference for TypeScript emission. mod context; mod emit_ts; @@ -26,7 +24,7 @@ use std::collections::BTreeMap; use indexmap::IndexMap; use crate::diagnostics::Diagnostics; -use crate::parser::ast::Root; +use crate::parser::ast::{self, Root}; use crate::query::dependencies::DependencyAnalysis; use crate::query::source_map::SourceId; use crate::query::symbol_table::{SymbolTable, UNNAMED_DEF}; @@ -44,105 +42,140 @@ pub fn infer_types( dependency_analysis: &DependencyAnalysis, diag: &mut Diagnostics, ) -> TypeContext { - let mut ctx = TypeContext::new(); - - // Seed def mappings from DependencyAnalysis (avoids re-registration) - ctx.seed_defs( - dependency_analysis.def_names(), - dependency_analysis.name_to_def(), - ); - - // Mark recursive definitions before inference. - // A def is recursive if it's in an SCC with >1 member, or it references itself. - for scc in &dependency_analysis.sccs { - let is_recursive_scc = if scc.len() > 1 { - true - } else if let Some(name) = scc.first() - && let Some(body) = symbol_table.get(name) - { - body_references_self(body, name) - } else { - false - }; + let ctx = TypeContext::new(); + InferencePass { + ctx, + interner, + ast_map, + symbol_table, + dependency_analysis, + diag, + } + .run() +} - if is_recursive_scc { - for def_name in scc { - let sym = interner.intern(def_name); - if let Some(def_id) = ctx.get_def_id_sym(sym) { - ctx.mark_recursive(def_id); +struct InferencePass<'a> { + ctx: TypeContext, + interner: &'a mut Interner, + ast_map: &'a IndexMap, + symbol_table: &'a SymbolTable, + dependency_analysis: &'a DependencyAnalysis, + diag: &'a mut Diagnostics, +} + +impl<'a> InferencePass<'a> { + fn run(mut self) -> TypeContext { + // Avoid re-registration of definitions + self.ctx.seed_defs( + self.dependency_analysis.def_names(), + self.dependency_analysis.name_to_def(), + ); + + self.mark_recursion(); + self.process_sccs(); + self.process_orphans(); + + self.ctx + } + + /// Identify and mark recursive definitions. + /// A def is recursive if it's in an SCC with >1 member, or it references itself directly. + fn mark_recursion(&mut self) { + for scc in &self.dependency_analysis.sccs { + if self.is_scc_recursive(scc) { + for def_name in scc { + let sym = self.interner.intern(def_name); + if let Some(def_id) = self.ctx.get_def_id_sym(sym) { + self.ctx.mark_recursive(def_id); + } } } } } - // Process definitions in SCC order (leaves first) - for scc in &dependency_analysis.sccs { - for def_name in scc { - // Get the source ID for this definition - let Some(source_id) = symbol_table.source_id(def_name) else { - continue; - }; + /// Process definitions in SCC order (leaves first). + fn process_sccs(&mut self) { + for scc in &self.dependency_analysis.sccs { + for def_name in scc { + if let Some(source_id) = self.symbol_table.source_id(def_name) { + self.infer_and_register(def_name, source_id); + } + } + } + } - let Some(root) = ast_map.get(&source_id) else { + /// Handle any definitions not in an SCC (safety net). + fn process_orphans(&mut self) { + for (name, source_id, _body) in self.symbol_table.iter_full() { + // Skip if already processed + if self.ctx.get_def_type_by_name(self.interner, name).is_some() { continue; - }; + } + self.infer_and_register(name, source_id); + } + } - // Run inference on this root - infer_root(&mut ctx, interner, symbol_table, source_id, root, diag); + fn infer_and_register(&mut self, def_name: &str, source_id: SourceId) { + let Some(root) = self.ast_map.get(&source_id) else { + return; + }; - // Register the definition's output type - if let Some(body) = symbol_table.get(def_name) - && let Some(info) = ctx.get_term_info(body).cloned() - { - let type_id = flow_to_type_id(&mut ctx, &info.flow); - ctx.set_def_type_by_name(interner, def_name, type_id); - } + infer_root( + &mut self.ctx, + self.interner, + self.symbol_table, + source_id, + root, + self.diag, + ); + + // Register the definition's output type based on the inferred body flow + if let Some(body) = self.symbol_table.get(def_name) + && let Some(info) = self.ctx.get_term_info(body).cloned() + { + let type_id = self.flow_to_type_id(&info.flow); + self.ctx + .set_def_type_by_name(self.interner, def_name, type_id); } } - // Handle any definitions not in an SCC (shouldn't happen, but be safe) - for (name, source_id, _body) in symbol_table.iter_full() { - if ctx.get_def_type_by_name(interner, name).is_some() { - continue; + fn is_scc_recursive(&self, scc: &[String]) -> bool { + if scc.len() > 1 { + return true; } - let Some(root) = ast_map.get(&source_id) else { - continue; + let Some(name) = scc.first() else { + return false; }; - infer_root(&mut ctx, interner, symbol_table, source_id, root, diag); + let Some(body) = self.symbol_table.get(name) else { + return false; + }; - if let Some(body) = symbol_table.get(name) - && let Some(info) = ctx.get_term_info(body).cloned() - { - let type_id = flow_to_type_id(&mut ctx, &info.flow); - ctx.set_def_type_by_name(interner, name, type_id); - } + body_references_self(body, name) } - ctx -} - -/// Check if an expression body contains a reference to the given name. -fn body_references_self(body: &crate::parser::ast::Expr, name: &str) -> bool { - use crate::parser::ast::Ref; - for descendant in body.as_cst().descendants() { - if let Some(r) = Ref::cast(descendant) - && let Some(name_tok) = r.name() - && name_tok.text() == name - { - return true; + fn flow_to_type_id(&mut self, flow: &TypeFlow) -> TypeId { + match flow { + TypeFlow::Void => self.ctx.intern_struct(BTreeMap::new()), + TypeFlow::Scalar(id) | TypeFlow::Bubble(id) => *id, } } - false } -/// Convert a TypeFlow to a TypeId for storage. -fn flow_to_type_id(ctx: &mut TypeContext, flow: &TypeFlow) -> TypeId { - match flow { - TypeFlow::Void => ctx.intern_struct(BTreeMap::new()), - TypeFlow::Scalar(type_id) | TypeFlow::Bubble(type_id) => *type_id, - } +/// Check if an expression body contains a reference to the given name. +fn body_references_self(body: &ast::Expr, name: &str) -> bool { + body.as_cst().descendants().any(|descendant| { + let Some(r) = ast::Ref::cast(descendant) else { + return false; + }; + + let Some(name_tok) = r.name() else { + return false; + }; + + name_tok.text() == name + }) } /// Get the primary definition name (first non-underscore, or underscore if none). @@ -152,5 +185,6 @@ pub fn primary_def_name(symbol_table: &SymbolTable) -> &str { return name; } } + UNNAMED_DEF } diff --git a/crates/plotnik-lib/src/query/type_check/symbol.rs b/crates/plotnik-lib/src/query/type_check/symbol.rs index 8feadf41..5fbeeddf 100644 --- a/crates/plotnik-lib/src/query/type_check/symbol.rs +++ b/crates/plotnik-lib/src/query/type_check/symbol.rs @@ -1,31 +1,22 @@ -//! Symbol interning and definition identifiers. -//! -//! `Symbol` and `Interner` are re-exported from `plotnik_core`. -//! `DefId` identifies named definitions (like `Foo = ...`) by stable index. - pub use plotnik_core::{Interner, Symbol}; /// A lightweight handle to a named definition. /// -/// Assigned during dependency analysis. Enables O(1) lookup of definition -/// properties without string comparison. +/// Assigned during dependency analysis. #[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] pub struct DefId(u32); impl DefId { - /// Create a DefId from a raw index. Use only for deserialization. #[inline] pub fn from_raw(index: u32) -> Self { Self(index) } - /// Raw index for serialization/debugging. #[inline] pub fn as_u32(self) -> u32 { self.0 } - /// Index for array access. #[inline] pub fn index(self) -> usize { self.0 as usize diff --git a/crates/plotnik-lib/src/query/type_check/types.rs b/crates/plotnik-lib/src/query/type_check/types.rs index a553c010..cb724937 100644 --- a/crates/plotnik-lib/src/query/type_check/types.rs +++ b/crates/plotnik-lib/src/query/type_check/types.rs @@ -1,70 +1,57 @@ //! Core type definitions for the type checking pass. //! //! The type system tracks two orthogonal properties: -//! - Arity: Whether an expression matches one or many node positions (for field validation) -//! - TypeFlow: What data flows through an expression (for TypeScript emission) +//! - Arity: Whether an expression matches one or many node positions. +//! - TypeFlow: What data flows through an expression. use std::collections::BTreeMap; use super::symbol::{DefId, Symbol}; -/// Interned type identifier. Types are stored in TypeContext and referenced by ID. +/// Interned type identifier. #[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] pub struct TypeId(pub u32); -/// Void type - produces nothing, transparent pub const TYPE_VOID: TypeId = TypeId(0); -/// Node type - a tree-sitter node pub const TYPE_NODE: TypeId = TypeId(1); -/// String type - extracted text from a node via `:: string` pub const TYPE_STRING: TypeId = TypeId(2); impl TypeId { pub fn is_builtin(self) -> bool { - self.0 <= 2 + self.0 <= TYPE_STRING.0 } } /// The kind of a type, determining its structure. #[derive(Clone, PartialEq, Eq, Hash, Debug)] pub enum TypeKind { - /// Produces nothing, transparent to parent scope + /// Produces nothing, transparent to parent scope. Void, - /// A tree-sitter node + /// A tree-sitter node. Node, - /// Extracted text from a node + /// Extracted text from a node. String, - /// User-specified type via `@x :: TypeName` + /// User-specified type via `@x :: TypeName`. Custom(Symbol), - /// Object with named fields (keys are interned Symbols) + /// Object with named fields. Struct(BTreeMap), - /// Tagged union from labeled alternations (keys are interned Symbols) + /// Tagged union from labeled alternations. Enum(BTreeMap), - /// Array type with element type + /// Array type with element type. Array { element: TypeId, non_empty: bool }, - /// Optional wrapper + /// Optional wrapper. Optional(TypeId), - /// Forward reference to a recursive type (resolved DefId) + /// Forward reference to a recursive type. Ref(DefId), } impl TypeKind { pub fn is_void(&self) -> bool { - matches!(self, TypeKind::Void) + matches!(self, Self::Void) } pub fn is_scalar(&self) -> bool { - matches!( - self, - TypeKind::Node - | TypeKind::String - | TypeKind::Custom(_) - | TypeKind::Struct(_) - | TypeKind::Enum(_) - | TypeKind::Array { .. } - | TypeKind::Optional(_) - | TypeKind::Ref(_) - ) + !self.is_void() } } @@ -98,6 +85,57 @@ impl FieldInfo { } } +/// Structural arity - whether an expression matches one or many positions. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Arity { + /// Exactly one node position. + One, + /// Multiple sequential positions. + Many, +} + +impl Arity { + /// Combine arities: Many wins. + pub fn combine(self, other: Self) -> Self { + if self == Self::One && other == Self::One { + return Self::One; + } + Self::Many + } +} + +/// Data flow through an expression. +#[derive(Clone, Debug)] +pub enum TypeFlow { + /// Transparent, produces nothing. + Void, + /// Opaque single value that doesn't bubble (scope boundary). + Scalar(TypeId), + /// Struct type whose fields bubble to parent scope. + Bubble(TypeId), +} + +impl TypeFlow { + pub fn is_void(&self) -> bool { + matches!(self, Self::Void) + } + + pub fn is_scalar(&self) -> bool { + matches!(self, Self::Scalar(_)) + } + + pub fn is_bubble(&self) -> bool { + matches!(self, Self::Bubble(_)) + } + + pub fn type_id(&self) -> Option { + match self { + Self::Void => None, + Self::Scalar(id) | Self::Bubble(id) => Some(*id), + } + } +} + /// Combined arity and type flow information for an expression. #[derive(Clone, Debug)] pub struct TermInfo { @@ -139,85 +177,24 @@ impl TermInfo { } } -/// Structural arity - whether an expression matches one or many positions. -/// -/// Used for field validation: `field: expr` requires `expr` to have `Arity::One`. -#[derive(Copy, Clone, PartialEq, Eq, Debug)] -pub enum Arity { - /// Exactly one node position - One, - /// Multiple sequential positions - Many, -} - -impl Arity { - /// Combine arities: Many wins - pub fn combine(self, other: Arity) -> Arity { - match (self, other) { - (Arity::One, Arity::One) => Arity::One, - _ => Arity::Many, - } - } -} - -/// Data flow through an expression. -/// -/// Determines what data an expression contributes to output: -/// - Void: Transparent, produces nothing (used for structural matching) -/// - Scalar: Opaque single value that doesn't bubble (scope boundary) -/// - Bubble: Struct type whose fields bubble to parent scope -#[derive(Clone, Debug)] -pub enum TypeFlow { - /// Transparent, produces nothing - Void, - /// Opaque single value that doesn't bubble - Scalar(TypeId), - /// Struct type with fields that bubble to parent scope. - /// The TypeId must point to a TypeKind::Struct. - Bubble(TypeId), -} - -impl TypeFlow { - pub fn is_void(&self) -> bool { - matches!(self, TypeFlow::Void) - } - - pub fn is_scalar(&self) -> bool { - matches!(self, TypeFlow::Scalar(_)) - } - - pub fn is_bubble(&self) -> bool { - matches!(self, TypeFlow::Bubble(_)) - } - - /// Get the TypeId if this is Scalar or Bubble - pub fn type_id(&self) -> Option { - match self { - TypeFlow::Void => None, - TypeFlow::Scalar(id) | TypeFlow::Bubble(id) => Some(*id), - } - } -} - -/// Quantifier kind for type inference +/// Quantifier kind for type inference. #[derive(Copy, Clone, PartialEq, Eq, Debug)] pub enum QuantifierKind { - /// `?` or `??` - zero or one, no dimensionality added + /// `?` or `??` - zero or one. Optional, - /// `*` or `*?` - zero or more, adds dimensionality + /// `*` or `*?` - zero or more. ZeroOrMore, - /// `+` or `+?` - one or more, adds dimensionality + /// `+` or `+?` - one or more. OneOrMore, } impl QuantifierKind { - /// Whether this quantifier requires strict dimensionality (row capture for internal captures) + /// Whether this quantifier requires strict dimensionality (row capture). pub fn requires_row_capture(self) -> bool { - matches!(self, QuantifierKind::ZeroOrMore | QuantifierKind::OneOrMore) + matches!(self, Self::ZeroOrMore | Self::OneOrMore) } - /// Whether the resulting array is non-empty pub fn is_non_empty(self) -> bool { - matches!(self, QuantifierKind::OneOrMore) + matches!(self, Self::OneOrMore) } } diff --git a/crates/plotnik-lib/src/query/type_check/unify.rs b/crates/plotnik-lib/src/query/type_check/unify.rs index b5ffd207..f44014b5 100644 --- a/crates/plotnik-lib/src/query/type_check/unify.rs +++ b/crates/plotnik-lib/src/query/type_check/unify.rs @@ -7,7 +7,7 @@ use std::collections::BTreeMap; use super::context::TypeContext; use super::symbol::Symbol; -use super::types::{FieldInfo, TYPE_NODE, TYPE_VOID, TypeFlow, TypeId}; +use super::types::{FieldInfo, TYPE_VOID, TypeFlow, TypeId}; /// Error during type unification. #[derive(Clone, Debug)] @@ -33,17 +33,36 @@ impl UnifyError { } } +/// Unify multiple flows from alternation branches. +pub fn unify_flows( + ctx: &mut TypeContext, + flows: impl IntoIterator, +) -> Result { + let mut iter = flows.into_iter(); + let Some(first) = iter.next() else { + return Ok(TypeFlow::Void); + }; + + iter.try_fold(first, |acc, flow| unify_flow(ctx, acc, flow)) +} + /// Unify two TypeFlows from alternation branches. /// /// Rules: /// - Void ∪ Void → Void /// - Void ∪ Bubble(s) → Bubble(make_all_optional(s)) /// - Bubble(a) ∪ Bubble(b) → Bubble(merge_fields(a, b)) -/// - Scalar in untagged → Error (use tagged alternation instead) +/// - Scalar in untagged → Error pub fn unify_flow(ctx: &mut TypeContext, a: TypeFlow, b: TypeFlow) -> Result { + // Untagged alternations cannot contain scalars. + if matches!(a, TypeFlow::Scalar(_)) || matches!(b, TypeFlow::Scalar(_)) { + return Err(UnifyError::ScalarInUntagged); + } + match (a, b) { (TypeFlow::Void, TypeFlow::Void) => Ok(TypeFlow::Void), + // Void ∪ Bubble -> Bubble (all fields become optional) (TypeFlow::Void, TypeFlow::Bubble(id)) | (TypeFlow::Bubble(id), TypeFlow::Void) => { let fields = ctx.get_struct_fields(id).cloned().unwrap_or_default(); let optional_fields = make_all_optional(fields); @@ -53,28 +72,16 @@ pub fn unify_flow(ctx: &mut TypeContext, a: TypeFlow, b: TypeFlow) -> Result { let a_fields = ctx.get_struct_fields(a_id).cloned().unwrap_or_default(); let b_fields = ctx.get_struct_fields(b_id).cloned().unwrap_or_default(); + let merged = merge_fields(a_fields, b_fields)?; Ok(TypeFlow::Bubble(ctx.intern_struct(merged))) } - // Scalars can't appear in untagged alternations - (TypeFlow::Scalar(_), _) | (_, TypeFlow::Scalar(_)) => Err(UnifyError::ScalarInUntagged), + // Should be unreachable due to initial scalar check, but technically possible if new variants are added + _ => Err(UnifyError::ScalarInUntagged), } } -/// Unify multiple flows from alternation branches. -pub fn unify_flows( - ctx: &mut TypeContext, - flows: impl IntoIterator, -) -> Result { - let mut iter = flows.into_iter(); - let Some(first) = iter.next() else { - return Ok(TypeFlow::Void); - }; - - iter.try_fold(first, |acc, flow| unify_flow(ctx, acc, flow)) -} - /// Make all fields in a map optional. fn make_all_optional(fields: BTreeMap) -> BTreeMap { fields @@ -86,39 +93,29 @@ fn make_all_optional(fields: BTreeMap) -> BTreeMap, - b: BTreeMap, + mut b: BTreeMap, ) -> Result, UnifyError> { let mut result = BTreeMap::new(); - // Process all keys from a - for (key, a_info) in &a { - if let Some(b_info) = b.get(key) { + // Process all keys from 'a'. Check intersection with 'b'. + for (key, a_info) in a { + if let Some(b_info) = b.remove(&key) { // Key exists in both: unify types - let unified_type = unify_type_ids(a_info.type_id, b_info.type_id, *key)?; + let type_id = unify_type_ids(a_info.type_id, b_info.type_id, key)?; let optional = a_info.optional || b_info.optional; - result.insert( - *key, - FieldInfo { - type_id: unified_type, - optional, - }, - ); + result.insert(key, FieldInfo { type_id, optional }); } else { - // Key only in a: make optional - result.insert(*key, a_info.make_optional()); + // Key only in 'a': make optional + result.insert(key, a_info.make_optional()); } } - // Process keys only in b - for (key, b_info) in b { - if !a.contains_key(&key) { - result.insert(key, b_info.make_optional()); - } - } + // Remaining keys in 'b' were not in 'a': make optional + result.extend(make_all_optional(b)); Ok(result) } @@ -126,17 +123,11 @@ fn merge_fields( /// Unify two type IDs. /// /// For now, types must match exactly (except Node is compatible with Node). -/// Future: could allow structural subtyping for structs. fn unify_type_ids(a: TypeId, b: TypeId, field: Symbol) -> Result { if a == b { return Ok(a); } - // Both are Node type - compatible - if a == TYPE_NODE && b == TYPE_NODE { - return Ok(TYPE_NODE); - } - // Void is compatible with anything (treat as identity) if a == TYPE_VOID { return Ok(b); @@ -152,6 +143,7 @@ fn unify_type_ids(a: TypeId, b: TypeId, field: Symbol) -> Result Date: Mon, 22 Dec 2025 18:14:11 -0300 Subject: [PATCH 16/18] Fix TypeScript type emit order for tagged unions --- .../src/query/type_check/emit_ts.rs | 2 +- .../plotnik-lib/src/query/type_check_tests.rs | 48 +++++++++---------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/crates/plotnik-lib/src/query/type_check/emit_ts.rs b/crates/plotnik-lib/src/query/type_check/emit_ts.rs index 3c9fd83b..44d75568 100644 --- a/crates/plotnik-lib/src/query/type_check/emit_ts.rs +++ b/crates/plotnik-lib/src/query/type_check/emit_ts.rs @@ -432,7 +432,7 @@ impl<'a> TsEmitter<'a> { fn emit_tagged_union(&mut self, name: &str, variants: &BTreeMap, export: &str) { let mut variant_types = Vec::new(); - for (&sym, &type_id) in self.sort_map_by_name(variants) { + for (&sym, &type_id) in variants { let variant_name = self.interner.resolve(sym); let variant_type_name = format!("{}{}", name, to_pascal_case(variant_name)); variant_types.push(variant_type_name.clone()); diff --git a/crates/plotnik-lib/src/query/type_check_tests.rs b/crates/plotnik-lib/src/query/type_check_tests.rs index 492477d1..24e80db4 100644 --- a/crates/plotnik-lib/src/query/type_check_tests.rs +++ b/crates/plotnik-lib/src/query/type_check_tests.rs @@ -364,17 +364,17 @@ fn tagged_alt_basic() { text: string; } - export interface QNum { - $tag: "Num"; - $data: { n: Node }; - } - export interface QStr { $tag: "Str"; $data: { s: Node }; } - export type Q = QNum | QStr; + export interface QNum { + $tag: "Num"; + $data: { n: Node }; + } + + export type Q = QStr | QNum; "#); } @@ -395,17 +395,17 @@ fn tagged_alt_with_type_annotation() { text: string; } - export interface QNum { - $tag: "Num"; - $data: { n: Node }; - } - export interface QStr { $tag: "Str"; $data: { s: string }; } - export type Q = QNum | QStr; + export interface QNum { + $tag: "Num"; + $data: { n: Node }; + } + + export type Q = QStr | QNum; "#); } @@ -426,17 +426,17 @@ fn tagged_alt_captured() { text: string; } - export interface QResultNum { - $tag: "Num"; - $data: { n: Node }; - } - export interface QResultStr { $tag: "Str"; $data: { s: Node }; } - export type QResult = QResultNum | QResultStr; + export interface QResultNum { + $tag: "Num"; + $data: { n: Node }; + } + + export type QResult = QResultStr | QResultNum; export interface Q { result: QResult; @@ -541,17 +541,17 @@ fn recursive_type_with_alternation() { let res = Query::expect_valid_types(input); insta::assert_snapshot!(res, @r#" - export interface ExprBinary { - $tag: "Binary"; - $data: { left: Expr; right: Expr }; - } - export interface ExprLit { $tag: "Lit"; $data: { value: string }; } - export type Expr = ExprBinary | ExprLit; + export interface ExprBinary { + $tag: "Binary"; + $data: { left: Expr; right: Expr }; + } + + export type Expr = ExprLit | ExprBinary; "#); } From 3a2695bd51c9ce8baae5709a84d298120ef9844d Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 18:38:45 -0300 Subject: [PATCH 17/18] Finish --- AGENTS.md | 49 ++++-- crates/plotnik-cli/src/commands/debug/mod.rs | 23 ++- crates/plotnik-cli/src/commands/exec.rs | 4 +- crates/plotnik-cli/src/commands/types.rs | 36 ++-- .../src/query/type_check/context.rs | 12 +- .../src/query/type_check/emit_ts.rs | 41 +++-- .../plotnik-lib/src/query/type_check_tests.rs | 155 ++++++++++++------ 7 files changed, 218 insertions(+), 102 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 140b7883..29c0847a 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -154,7 +154,7 @@ Tree-sitter: `((a) (b))` — Plotnik: `{(a) (b)}`. The #1 syntax error. ``` crates/ plotnik-cli/ # CLI tool - src/commands/ # Subcommands (debug, docs, exec, langs, types) + src/commands/ # Subcommands (debug, exec, langs, types) plotnik-core/ # Common code plotnik-lib/ # Plotnik as library src/ @@ -172,23 +172,44 @@ docs/ Run: `cargo run -p plotnik-cli -- ` -| Command | Purpose | -| ------- | ------------------------------- | -| `debug` | Inspect queries and source ASTs | -| `exec` | Execute query, output JSON | -| `types` | Generate TypeScript types | -| `langs` | List supported languages | +| Command | Purpose | Status | +| ------- | ------------------------------- | ------- | +| `debug` | Inspect queries and source ASTs | Working | +| `types` | Generate TypeScript types | Working | +| `langs` | List supported languages | Working | +| `exec` | Execute query, output JSON | Not yet | -Common: `-q/--query `, `--query-file `, `--source `, `-s/--source-file `, `-l/--lang ` +## debug -`debug`: `--only-symbols`, `--cst`, `--raw`, `--spans`, `--arities`, `--graph`, `--graph-raw`, `--types` -`exec`: `--pretty`, `--verbose-nodes`, `--check`, `--entry ` -`types`: `--format `, `--root-type `, `--verbose-nodes`, `--no-node-type`, `--no-export`, `-o ` +Inspect query AST/CST or parse source files with tree-sitter. ```sh -cargo run -p plotnik-cli -- debug -q '(identifier) @id' --graph -l javascript -cargo run -p plotnik-cli -- exec -q '(identifier) @id' -s app.js --pretty -cargo run -p plotnik-cli -- types -q '(identifier) @id' -l javascript -o types.d.ts +cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' +cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --only-symbols +cargo run -p plotnik-cli -- debug -q 'Test = (identifier) @id' --types +cargo run -p plotnik-cli -- debug -s app.ts +cargo run -p plotnik-cli -- debug -s app.ts --raw +``` + +Options: `--only-symbols`, `--cst`, `--raw`, `--spans`, `--arities`, `--types` + +## types + +Generate TypeScript type definitions from a query. Requires `-l/--lang` to validate node types against grammar. + +```sh +cargo run -p plotnik-cli -- types -q 'Test = (identifier) @id' -l javascript +cargo run -p plotnik-cli -- types --query-file query.ptk -l typescript -o types.d.ts +``` + +Options: `--root-type `, `--verbose-nodes`, `--no-node-type`, `--no-export`, `-o ` + +## langs + +List supported tree-sitter languages. + +```sh +cargo run -p plotnik-cli -- langs ``` # Coding Rules diff --git a/crates/plotnik-cli/src/commands/debug/mod.rs b/crates/plotnik-cli/src/commands/debug/mod.rs index 5251720d..be2863c7 100644 --- a/crates/plotnik-cli/src/commands/debug/mod.rs +++ b/crates/plotnik-cli/src/commands/debug/mod.rs @@ -40,20 +40,13 @@ pub fn run(args: DebugArgs) { None }; - let mut query = query_source.as_ref().map(|src| { + let query = query_source.as_ref().map(|src| { Query::try_from(src.as_str()).unwrap_or_else(|e| { eprintln!("error: {}", e); std::process::exit(1); }) }); - // Auto-link when --lang is provided with a query - if args.lang.is_some() - && let Some(ref mut _q) = query - { - unimplemented!(); - } - let show_query = has_query_input && !args.symbols && !args.graph && !args.types; let show_source = has_source_input; @@ -81,11 +74,17 @@ pub fn run(args: DebugArgs) { ); } - // Build graph if needed for --graph, --graph-raw, or --types - if (args.graph || args.graph_raw || args.types) - && let Some(_) = query.take() + if args.graph || args.graph_raw { + eprintln!("error: --graph and --graph-raw are not yet implemented"); + std::process::exit(1); + } + + if args.types + && let Some(ref q) = query { - unimplemented!(); + let output = + plotnik_lib::query::type_check::emit_typescript(q.type_context(), q.interner()); + print!("{}", output); } if show_source { diff --git a/crates/plotnik-cli/src/commands/exec.rs b/crates/plotnik-cli/src/commands/exec.rs index d791e908..d99c8787 100644 --- a/crates/plotnik-cli/src/commands/exec.rs +++ b/crates/plotnik-cli/src/commands/exec.rs @@ -52,7 +52,9 @@ pub fn run(args: ExecArgs) { let _ = (args.pretty, args.verbose_nodes, args.check, args.entry); - todo!("IR emission and query execution not yet implemented") + eprintln!("error: query execution not yet implemented"); + eprintln!("hint: use `plotnik types` to generate TypeScript types from queries"); + std::process::exit(1); } fn load_query(args: &ExecArgs) -> String { diff --git a/crates/plotnik-cli/src/commands/types.rs b/crates/plotnik-cli/src/commands/types.rs index 8259d255..a913812c 100644 --- a/crates/plotnik-cli/src/commands/types.rs +++ b/crates/plotnik-cli/src/commands/types.rs @@ -1,11 +1,10 @@ -#![allow(dead_code)] - use std::fs; -use std::io::{self, Read}; +use std::io::{self, Read, Write}; use std::path::PathBuf; use plotnik_langs::Lang; use plotnik_lib::Query; +use plotnik_lib::query::type_check::{EmitConfig, emit_typescript_with_config}; pub struct TypesArgs { pub query_text: Option, @@ -32,7 +31,7 @@ pub fn run(args: TypesArgs) { } let lang = resolve_lang_required(&args.lang); - // Parse and validate query + // Parse and analyze query let query = Query::try_from(query_source.as_str()) .unwrap_or_else(|e| { eprintln!("error: {}", e); @@ -45,13 +44,25 @@ pub fn run(args: TypesArgs) { std::process::exit(1); } - // Link query against language - if !query.is_valid() { - eprint!("{}", query.diagnostics().render(query.source_map())); - std::process::exit(1); - } + // Emit TypeScript types + let config = EmitConfig { + export: args.export, + emit_node_type: !args.no_node_type, + root_type_name: args.root_type, + verbose_nodes: args.verbose_nodes, + }; - unimplemented!(); + let output = emit_typescript_with_config(query.type_context(), query.interner(), config); + + // Write output + if let Some(ref path) = args.output { + fs::write(path, &output).unwrap_or_else(|e| { + eprintln!("error: failed to write {}: {}", path.display(), e); + std::process::exit(1); + }); + } else { + io::stdout().write_all(output.as_bytes()).unwrap(); + } } fn load_query(args: &TypesArgs) -> String { @@ -66,7 +77,10 @@ fn load_query(args: &TypesArgs) -> String { .expect("failed to read stdin"); return buf; } - return fs::read_to_string(path).expect("failed to read query file"); + return fs::read_to_string(path).unwrap_or_else(|e| { + eprintln!("error: failed to read query file: {}", e); + std::process::exit(1); + }); } unreachable!("validation ensures query input exists") } diff --git a/crates/plotnik-lib/src/query/type_check/context.rs b/crates/plotnik-lib/src/query/type_check/context.rs index 4219f150..5f45ff94 100644 --- a/crates/plotnik-lib/src/query/type_check/context.rs +++ b/crates/plotnik-lib/src/query/type_check/context.rs @@ -211,11 +211,15 @@ impl TypeContext { self.types.len() } - /// Iterate over all definition types as (DefId, TypeId). + /// Iterate over all definition types as (DefId, TypeId) in DefId order. + /// DefId order corresponds to SCC processing order (leaves first). pub fn iter_def_types(&self) -> impl Iterator + '_ { - self.def_types - .iter() - .map(|(&def_id, &type_id)| (def_id, type_id)) + (0..self.def_names.len()).filter_map(|i| { + let def_id = DefId::from_raw(i as u32); + self.def_types + .get(&def_id) + .map(|&type_id| (def_id, type_id)) + }) } /// Number of registered definitions. diff --git a/crates/plotnik-lib/src/query/type_check/emit_ts.rs b/crates/plotnik-lib/src/query/type_check/emit_ts.rs index 44d75568..f798d2b8 100644 --- a/crates/plotnik-lib/src/query/type_check/emit_ts.rs +++ b/crates/plotnik-lib/src/query/type_check/emit_ts.rs @@ -79,17 +79,19 @@ impl<'a> TsEmitter<'a> { pub fn emit(mut self) -> String { self.prepare_emission(); - // Collect definition names for lookup - let def_names: HashMap = self - .ctx - .iter_def_types() - .map(|(def_id, type_id)| { - ( - type_id, - self.ctx.def_name(self.interner, def_id).to_string(), - ) - }) - .collect(); + // Collect all definitions, tracking primary name per TypeId and aliases + let mut primary_names: HashMap = HashMap::new(); + let mut aliases: Vec<(String, TypeId)> = Vec::new(); + + for (def_id, type_id) in self.ctx.iter_def_types() { + let name = self.ctx.def_name(self.interner, def_id).to_string(); + if primary_names.contains_key(&type_id) { + // This TypeId already has a primary definition; this becomes an alias + aliases.push((name, type_id)); + } else { + primary_names.insert(type_id, name); + } + } // Collect all reachable types starting from definitions let mut to_emit = HashSet::new(); @@ -99,13 +101,20 @@ impl<'a> TsEmitter<'a> { // Emit in topological order for type_id in self.sort_topologically(to_emit) { - if let Some(def_name) = def_names.get(&type_id) { + if let Some(def_name) = primary_names.get(&type_id) { self.emit_type_definition(def_name, type_id); } else { self.emit_generated_or_custom(type_id); } } + // Emit type aliases for definitions that share a TypeId with another definition + for (alias_name, type_id) in aliases { + if let Some(primary_name) = primary_names.get(&type_id) { + self.emit_type_alias(&alias_name, primary_name); + } + } + self.output } @@ -455,6 +464,14 @@ impl<'a> TsEmitter<'a> { .push_str(&format!("{}type {} = Node;\n\n", export, name)); } + fn emit_type_alias(&mut self, alias_name: &str, target_name: &str) { + let export = if self.config.export { "export " } else { "" }; + self.output.push_str(&format!( + "{}type {} = {};\n\n", + export, alias_name, target_name + )); + } + fn emit_node_interface(&mut self) { let export = if self.config.export { "export " } else { "" }; if self.config.verbose_nodes { diff --git a/crates/plotnik-lib/src/query/type_check_tests.rs b/crates/plotnik-lib/src/query/type_check_tests.rs index 24e80db4..c1d8bab8 100644 --- a/crates/plotnik-lib/src/query/type_check_tests.rs +++ b/crates/plotnik-lib/src/query/type_check_tests.rs @@ -1,6 +1,65 @@ use crate::Query; use indoc::indoc; +#[test] +fn multiple_definitions_all_emitted() { + let input = indoc! {r#" + Id = (identifier) @id + Foo = (function_declaration name: (Id)) + Bar = (class_declaration name: (Id)) + "#}; + + let res = Query::expect_valid_types(input); + + // All three definitions emitted: Id as primary, Foo and Bar as aliases + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Id { + id: Node; + } + + export type Foo = Id; + + export type Bar = Id; + "); +} + +#[test] +fn multiple_definitions_distinct_types() { + let input = indoc! {r#" + Name = (identifier) @name + Value = (number) @value + Both = (pair (identifier) @key (number) @val) + "#}; + + let res = Query::expect_valid_types(input); + + // All three definitions emitted with their own types + insta::assert_snapshot!(res, @r" + export interface Node { + kind: string; + text: string; + } + + export interface Both { + key: Node; + val: Node; + } + + export interface Value { + value: Node; + } + + export interface Name { + name: Node; + } + "); +} + #[test] fn capture_single_node() { let input = "Q = (identifier) @name"; @@ -73,10 +132,10 @@ fn named_node_with_field_capture() { #[test] fn named_node_multiple_field_captures() { let input = indoc! {r#" - Q = (function - name: (identifier) @name - body: (block) @body - ) + Q = (function + name: (identifier) @name + body: (block) @body + ) "#}; let res = Query::expect_valid_types(input); @@ -97,9 +156,9 @@ fn named_node_multiple_field_captures() { #[test] fn nested_named_node_captures() { let input = indoc! {r#" - Q = (call - function: (member target: (identifier) @target) - ) + Q = (call + function: (member target: (identifier) @target) + ) "#}; let res = Query::expect_valid_types(input); @@ -155,7 +214,7 @@ fn scalar_list_one_or_more() { #[test] fn row_list_basic() { let input = indoc! {r#" - Q = {(key) @k (value) @v}* @rows + Q = {(key) @k (value) @v}* @rows "#}; let res = Query::expect_valid_types(input); @@ -180,7 +239,7 @@ fn row_list_basic() { #[test] fn row_list_non_empty() { let input = indoc! {r#" - Q = {(key) @k (value) @v}+ @rows + Q = {(key) @k (value) @v}+ @rows "#}; let res = Query::expect_valid_types(input); @@ -223,7 +282,7 @@ fn optional_single_capture() { #[test] fn optional_group_bubbles_fields() { let input = indoc! {r#" - Q = {(modifier) @mod (decorator) @dec}? + Q = {(modifier) @mod (decorator) @dec}? "#}; let res = Query::expect_valid_types(input); @@ -243,7 +302,7 @@ fn optional_group_bubbles_fields() { #[test] fn sequence_merges_fields() { let input = indoc! {r#" - Q = {(a) @a (b) @b} + Q = {(a) @a (b) @b} "#}; let res = Query::expect_valid_types(input); @@ -264,7 +323,7 @@ fn sequence_merges_fields() { #[test] fn captured_sequence_creates_struct() { let input = indoc! {r#" - Q = {(a) @a (b) @b} @row + Q = {(a) @a (b) @b} @row "#}; let res = Query::expect_valid_types(input); @@ -326,10 +385,10 @@ fn untagged_alt_different_captures() { #[test] fn untagged_alt_partial_overlap() { let input = indoc! {r#" - Q = [ - {(a) @x (b) @y} - {(a) @x} - ] + Q = [ + {(a) @x (b) @y} + {(a) @x} + ] "#}; let res = Query::expect_valid_types(input); @@ -350,10 +409,10 @@ fn untagged_alt_partial_overlap() { #[test] fn tagged_alt_basic() { let input = indoc! {r#" - Q = [ - Str: (string) @s - Num: (number) @n - ] + Q = [ + Str: (string) @s + Num: (number) @n + ] "#}; let res = Query::expect_valid_types(input); @@ -381,10 +440,10 @@ fn tagged_alt_basic() { #[test] fn tagged_alt_with_type_annotation() { let input = indoc! {r#" - Q = [ - Str: (string) @s :: string - Num: (number) @n - ] + Q = [ + Str: (string) @s :: string + Num: (number) @n + ] "#}; let res = Query::expect_valid_types(input); @@ -412,10 +471,10 @@ fn tagged_alt_with_type_annotation() { #[test] fn tagged_alt_captured() { let input = indoc! {r#" - Q = [ - Str: (string) @s - Num: (number) @n - ] @result + Q = [ + Str: (string) @s + Num: (number) @n + ] @result "#}; let res = Query::expect_valid_types(input); @@ -447,10 +506,10 @@ fn tagged_alt_captured() { #[test] fn nested_captured_group() { let input = indoc! {r#" - Q = { - (identifier) @name - {(key) @k (value) @v} @pair - } + Q = { + (identifier) @name + {(key) @k (value) @v} @pair + } "#}; let res = Query::expect_valid_types(input); @@ -476,7 +535,7 @@ fn nested_captured_group() { #[test] fn error_star_with_internal_captures_no_row() { let input = indoc! {r#" - Bad = {(a) @a (b) @b}* + Bad = {(a) @a (b) @b}* "#}; let res = Query::expect_invalid(input); @@ -494,7 +553,7 @@ fn error_star_with_internal_captures_no_row() { #[test] fn error_plus_with_internal_capture_no_row() { let input = indoc! {r#" - Bad = {(c) @c}+ + Bad = {(c) @c}+ "#}; let res = Query::expect_invalid(input); @@ -512,7 +571,7 @@ fn error_plus_with_internal_capture_no_row() { #[test] fn error_named_node_with_capture_quantified() { let input = indoc! {r#" - Bad = (func (identifier) @name)* + Bad = (func (identifier) @name)* "#}; let res = Query::expect_invalid(input); @@ -530,12 +589,12 @@ fn error_named_node_with_capture_quantified() { #[test] fn recursive_type_with_alternation() { let input = indoc! {r#" - Expr = [ - Lit: (number) @value ::string - Binary: (binary_expression - left: (Expr) @left - right: (Expr) @right) - ] + Expr = [ + Lit: (number) @value ::string + Binary: (binary_expression + left: (Expr) @left + right: (Expr) @right) + ] "#}; let res = Query::expect_valid_types(input); @@ -558,12 +617,12 @@ fn recursive_type_with_alternation() { #[test] fn recursive_type_optional_self_ref() { let input = indoc! {r#" - NestedCall = (call_expression - function: [ - (identifier) @name - (NestedCall) @inner - ] - ) + NestedCall = (call_expression + function: [ + (identifier) @name + (NestedCall) @inner + ] + ) "#}; let res = Query::expect_valid_types(input); @@ -584,7 +643,7 @@ fn recursive_type_optional_self_ref() { #[test] fn recursive_type_in_quantified_context() { let input = indoc! {r#" - Item = (item (Item)* @children) + Item = (item (Item)* @children) "#}; let res = Query::expect_valid_types(input); From eb0f365c204e4b35ee885f196978f53557eaabed Mon Sep 17 00:00:00 2001 From: Sergei Zharinov Date: Mon, 22 Dec 2025 19:51:12 -0300 Subject: [PATCH 18/18] Fix clippy --- crates/plotnik-lib/src/query/type_check/emit_ts.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/crates/plotnik-lib/src/query/type_check/emit_ts.rs b/crates/plotnik-lib/src/query/type_check/emit_ts.rs index f798d2b8..3f30a4cb 100644 --- a/crates/plotnik-lib/src/query/type_check/emit_ts.rs +++ b/crates/plotnik-lib/src/query/type_check/emit_ts.rs @@ -3,6 +3,7 @@ //! Converts inferred types to TypeScript declarations. //! Used as a test oracle to verify type inference correctness. +use std::collections::hash_map::Entry; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; use plotnik_core::Interner; @@ -85,11 +86,11 @@ impl<'a> TsEmitter<'a> { for (def_id, type_id) in self.ctx.iter_def_types() { let name = self.ctx.def_name(self.interner, def_id).to_string(); - if primary_names.contains_key(&type_id) { + if let Entry::Vacant(e) = primary_names.entry(type_id) { + e.insert(name); + } else { // This TypeId already has a primary definition; this becomes an alias aliases.push((name, type_id)); - } else { - primary_names.insert(type_id, name); } }