From f4b3714c15eef892abe4c733e056f6aeeedb32eb Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Thu, 18 Jun 2026 22:11:39 +0000 Subject: [PATCH 1/3] Speed up Tree-sitter Ruby syntax facade Co-authored-by: OpenAI Codex --- .../lib/nil_kill/source_index/observations.rb | 71 ++- .../nil_kill/source_index/param_protocols.rb | 9 +- gems/nil-kill/lib/nil_kill/syntax.rb | 459 ++++++++++-------- gems/nil-kill/spec/source_index_spec.rb | 27 ++ 4 files changed, 362 insertions(+), 204 deletions(-) diff --git a/gems/nil-kill/lib/nil_kill/source_index/observations.rb b/gems/nil-kill/lib/nil_kill/source_index/observations.rb index 6273f69c6..61fb84020 100644 --- a/gems/nil-kill/lib/nil_kill/source_index/observations.rb +++ b/gems/nil-kill/lib/nil_kill/source_index/observations.rb @@ -217,9 +217,35 @@ def collect_return_usage_site_context(node, context, current_method, current_han end def collect_hash_record_escape_sites!(root) - each_ast(root) do |node| + nodes, parents = ast_nodes_with_parents(root) + collection_values = {}.compare_by_identity + local_escape_names = Set.new + + nodes.each do |node| + if node.is_a?(Syntax::CallNode) + args = node.arguments&.arguments || [] + args.each do |arg| + collection_values[arg] = true if COLLECTION_APPEND_METHODS.include?(node.name.to_s) + local_escape_names << arg.name.to_s if arg.is_a?(Syntax::LocalVariableReadNode) + end + if node.name.to_s == "[]=" && node.arguments + last = node.arguments.arguments.last + collection_values[last] = true if last + end + elsif node.is_a?(Syntax::IndexOperatorWriteNode) || node.is_a?(Syntax::IndexAndWriteNode) || + node.is_a?(Syntax::IndexOrWriteNode) + value = node.respond_to?(:value) ? node.value : nil + collection_values[value] = true if value + elsif node.is_a?(Syntax::ArrayNode) + node.elements.each do |element| + local_escape_names << element.name.to_s if element.is_a?(Syntax::LocalVariableReadNode) + end + end + end + + nodes.each do |node| next unless node.is_a?(Syntax::HashNode) - reason = hash_record_escape_reason(root, node) + reason = indexed_hash_record_escape_reason(node, parents, collection_values, local_escape_names) next unless reason @hash_record_escape_sites << TypedRecords::HashRecordEscapeSiteRecord.new( path: @rel, @@ -231,6 +257,47 @@ def collect_hash_record_escape_sites!(root) end end + def ast_nodes_with_parents(root) + nodes = [] + parents = {}.compare_by_identity + stack = [[root, nil]] + until stack.empty? + node, parent = stack.pop + next unless node.is_a?(Syntax::Node) + nodes << node + parents[node] = parent if parent + node.compact_child_nodes.reverse_each { |child| stack << [child, node] } + end + [nodes, parents] + end + + def indexed_hash_record_escape_reason(hash_node, parents, collection_values, local_escape_names) + return "array_literal" if ancestor_node?(hash_node, parents, Syntax::ArrayNode) + return "collection_append_or_index_write" if collection_values.key?(hash_node) + writer = enclosing_local_write_for_hash(hash_node, parents) + return nil unless writer + + local_escape_names.include?(writer.name.to_s) ? "local_alias_escape" : nil + end + + def ancestor_node?(node, parents, klass) + parent = parents[node] + while parent + return true if parent.is_a?(klass) + parent = parents[parent] + end + false + end + + def enclosing_local_write_for_hash(hash_node, parents) + parent = parents[hash_node] + while parent + return parent if parent.is_a?(Syntax::LocalVariableWriteNode) && parent.value.equal?(hash_node) + parent = parents[parent] + end + nil + end + def hash_record_escape_reason(root, hash_node) return "array_literal" if hash_literal_in_array_literal?(root, hash_node) return "collection_append_or_index_write" if value_in_collection_append_or_index_write?(root, hash_node) diff --git a/gems/nil-kill/lib/nil_kill/source_index/param_protocols.rb b/gems/nil-kill/lib/nil_kill/source_index/param_protocols.rb index 69ff41efa..44dd79af0 100644 --- a/gems/nil-kill/lib/nil_kill/source_index/param_protocols.rb +++ b/gems/nil-kill/lib/nil_kill/source_index/param_protocols.rb @@ -12,11 +12,14 @@ def inspect_param_origins(node, scope) next unless assoc.respond_to?(:key) && assoc.respond_to?(:value) key = hash_key_name(assoc.key) next unless key - @param_origins << param_origin_record(node, assoc.value, callee, :keyword, key, scope) - record_callsite_hash_shape(callee, :keyword, key, assoc.value) - record_callsite_array_element_shape(callee, :keyword, key, assoc.value) + value = assoc.value + next unless value + @param_origins << param_origin_record(node, value, callee, :keyword, key, scope) + record_callsite_hash_shape(callee, :keyword, key, value) + record_callsite_array_element_shape(callee, :keyword, key, value) end else + next unless arg @param_origins << param_origin_record(node, arg, callee, :positional, idx, scope) record_callsite_hash_shape(callee, :positional, idx, arg) record_callsite_array_element_shape(callee, :positional, idx, arg) diff --git a/gems/nil-kill/lib/nil_kill/syntax.rb b/gems/nil-kill/lib/nil_kill/syntax.rb index cdffcf5b6..b30c88914 100644 --- a/gems/nil-kill/lib/nil_kill/syntax.rb +++ b/gems/nil-kill/lib/nil_kill/syntax.rb @@ -72,6 +72,8 @@ def length end class Context + EMPTY_SET = Set.new.freeze + attr_reader :source, :root, :path def initialize(source, root, path) @@ -79,15 +81,23 @@ def initialize(source, root, path) @root = root @path = path @cache = {} + @node_class_cache = {} + @children_cache = {} + @named_children_cache = {} + @named_field_cache = {} @locals_by_scope = {} - scan_scopes(root) + @scope_parent = {} + @scope_for_node = {} + @effective_locals_by_scope = {} + scan_scopes(root, []) + build_effective_locals! end def wrap(raw, force: nil) return nil unless raw return nil if raw.respond_to?(:named?) && !raw.named? && force.nil? - klass = force || node_class(raw) + klass = force || cached_node_class(raw) return nil unless klass key = [raw.start_byte, raw.end_byte, raw.kind, klass.name] @@ -110,21 +120,34 @@ def slice(raw) end def named_field(raw, name) - raw.child_by_field_name(name) + key = [scope_key(raw), name] + return @named_field_cache[key] if @named_field_cache.key?(key) + + @named_field_cache[key] = raw.child_by_field_name(name) rescue StandardError nil end + def children(raw) + key = scope_key(raw) + return @children_cache[key] if @children_cache.key?(key) + + @children_cache[key] = Array(raw.children) + end + def named_children(raw) - Array(raw.named_children) + key = scope_key(raw) + return @named_children_cache[key] if @named_children_cache.key?(key) + + @named_children_cache[key] = Array(raw.named_children) end def child_token(raw, text) - raw.children.find { |child| !child.named? && child.text.to_s == text } + children(raw).find { |child| !child.named? && child.text.to_s == text } end def first_child_kind(raw, kind) - raw.children.find { |child| child.kind == kind } + children(raw).find { |child| child.kind == kind } end def previous_named(raw) @@ -149,25 +172,19 @@ def local_name?(raw) end def scope_locals_for(raw) - locals = Set.new - @locals_by_scope.each do |(start_byte, end_byte, _kind), scope_locals| - locals.merge(scope_locals) if raw.start_byte >= start_byte && raw.end_byte <= end_byte - end - - node = raw - seen = Set.new - while node - key = scope_key(node) - break if seen.include?(key) - seen.add(key) - locals.merge(@locals_by_scope[key]) if @locals_by_scope.key?(key) - node = node.parent - end - locals + scope = @scope_for_node[scope_key(raw)] + scope ? @effective_locals_by_scope.fetch(scope, EMPTY_SET) : EMPTY_SET end private + def cached_node_class(raw) + key = scope_key(raw) + return @node_class_cache[key] if @node_class_cache.key?(key) + + @node_class_cache[key] = node_class(raw) + end + def node_class(raw) return nil if raw.kind == "comment" @@ -237,14 +254,14 @@ def identifier_class(raw) end def string_class(raw) - raw.named_children.any? { |child| child.kind == "interpolation" } ? InterpolatedStringNode : StringNode + named_children(raw).any? { |child| child.kind == "interpolation" } ? InterpolatedStringNode : StringNode end def assignment_class(raw) - lhs = named_field(raw, "left") || raw.named_children.first + lhs = named_field(raw, "left") || named_children(raw).first if lhs&.kind == "element_reference" - return IndexOrWriteNode if raw.kind == "operator_assignment" && raw.children.any? { |child| child.text.to_s == "||=" } - return IndexAndWriteNode if raw.kind == "operator_assignment" && raw.children.any? { |child| child.text.to_s == "&&=" } + return IndexOrWriteNode if raw.kind == "operator_assignment" && children(raw).any? { |child| child.text.to_s == "||=" } + return IndexAndWriteNode if raw.kind == "operator_assignment" && children(raw).any? { |child| child.text.to_s == "&&=" } return IndexOperatorWriteNode if raw.kind == "operator_assignment" return CallNode @@ -263,7 +280,7 @@ def assignment_class(raw) end def binary_class(raw) - op = raw.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s + op = children(raw).find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s %w[|| or].include?(op) ? OrNode : CallNode end @@ -280,7 +297,7 @@ def body_statement_class(raw) def method_body_statement?(raw) return false unless raw.kind == "body_statement" - return true if raw.children.first&.kind == "def" + return true if children(raw).first&.kind == "def" modifier_def_container(raw) end @@ -288,15 +305,15 @@ def method_body_statement?(raw) def modifier_def_container(raw) return nil unless %w[body_statement call].include?(raw.kind) - raw.named_children.find { |child| child.kind == "argument_list" && child.children.first&.kind == "def" } + named_children(raw).find { |child| child.kind == "argument_list" && children(child).first&.kind == "def" } end def hidden_def_container(raw) - raw.children.first&.kind == "def" ? raw : modifier_def_container(raw) + children(raw).first&.kind == "def" ? raw : modifier_def_container(raw) end def hidden_method_definition?(raw) - return true if raw.kind == "body_statement" && raw.children.first&.kind == "def" + return true if raw.kind == "body_statement" && children(raw).first&.kind == "def" return true if modifier_def_container(raw) false @@ -304,28 +321,28 @@ def hidden_method_definition?(raw) def return_body_statement?(raw) return false unless %w[body_statement block_body then].include?(raw.kind) - return false unless raw.children.first&.kind == "return" - return false if %w[block_body then].include?(raw.kind) && raw.named_children.any? { |child| child.kind == "return" } + return false unless children(raw).first&.kind == "return" + return false if %w[block_body then].include?(raw.kind) && named_children(raw).any? { |child| child.kind == "return" } true end def singleton_class_body_statement?(raw) raw.kind == "body_statement" && - raw.children.first&.kind == "class" && - !raw.children.first.named? && - raw.children.any? { |child| !child.named? && child.text.to_s == "<<" } + children(raw).first&.kind == "class" && + !children(raw).first.named? && + children(raw).any? { |child| !child.named? && child.text.to_s == "<<" } end def class_body_statement?(raw) raw.kind == "body_statement" && - raw.children.first&.kind == "class" && - !raw.children.first.named? && + children(raw).first&.kind == "class" && + !children(raw).first.named? && !singleton_class_body_statement?(raw) end def module_body_statement?(raw) - raw.kind == "body_statement" && raw.children.first&.kind == "module" && !raw.children.first.named? + raw.kind == "body_statement" && children(raw).first&.kind == "module" && !children(raw).first.named? end def control_body_statement?(raw) @@ -336,7 +353,7 @@ def control_body_statement_class(raw) return nil unless raw.kind == "body_statement" return nil if wrapped_control_statement_list?(raw) - case raw.children.first&.kind + case children(raw).first&.kind when "if" then IfNode when "unless" then UnlessNode when "while" then WhileNode @@ -349,19 +366,27 @@ def control_body_statement_class(raw) def wrapped_control_statement_list?(raw) return false unless raw.kind == "body_statement" - return false unless %w[if unless while until case begin].include?(raw.children.first&.kind) + return false unless %w[if unless while until case begin].include?(children(raw).first&.kind) - first_named = raw.named_children.first - %w[body_statement if unless while until case begin].include?(first_named&.kind) && raw.named_children.size > 1 + first_named = named_children(raw).first + %w[body_statement if unless while until case begin].include?(first_named&.kind) && named_children(raw).size > 1 end - def scan_scopes(raw) + def scan_scopes(raw, scope_stack) return unless raw + entered_scope = false if scope_boundary?(raw) - @locals_by_scope[scope_key(raw)] = collect_locals(raw) + key = scope_key(raw) + @locals_by_scope[key] = collect_locals(raw) + @scope_parent[key] = scope_stack.last + scope_stack.push(key) + entered_scope = true end - raw.children.each { |child| scan_scopes(child) } + @scope_for_node[scope_key(raw)] = scope_stack.last + children(raw).each { |child| scan_scopes(child, scope_stack) } + ensure + scope_stack.pop if entered_scope end def scope_boundary?(raw) @@ -379,13 +404,13 @@ def collect_locals(scope) return if node != scope && %w[method singleton_method class module singleton_class lambda].include?(node.kind) if %w[assignment operator_assignment].include?(node.kind) - lhs = named_field(node, "left") || node.named_children.first + lhs = named_field(node, "left") || named_children(node).first locals << lhs.text.to_s if lhs&.kind == "identifier" elsif node.kind == "rescue" variable = named_field(node, "variable") - variable&.named_children&.each { |child| locals << child.text.to_s if child.kind == "identifier" } + named_children(variable).each { |child| locals << child.text.to_s if child.kind == "identifier" } if variable end - node.children.each { |child| walk.call(child) } + children(node).each { |child| walk.call(child) } end walk.call(scope) locals @@ -396,19 +421,35 @@ def collect_param_names(scope, locals) if %w[method singleton_method].include?(scope.kind) named_field(scope, "parameters") elsif hidden_method_definition?(scope) - hidden_def_container(scope)&.named_children&.find { |child| child.kind == "method_parameters" } + container = hidden_def_container(scope) + container && named_children(container).find { |child| child.kind == "method_parameters" } elsif %w[block do_block].include?(scope.kind) named_field(scope, "parameters") - end + end return unless params - params.named_children.each do |param| + named_children(params).each do |param| name = named_field(param, "name") || - param.named_children.find { |child| child.kind == "identifier" } || + named_children(param).find { |child| child.kind == "identifier" } || (param.kind == "identifier" ? param : nil) locals << name.text.to_s if name end end + + def build_effective_locals! + @locals_by_scope.each_key { |scope| effective_locals_for(scope, Set.new) } + end + + def effective_locals_for(scope, seen) + return @effective_locals_by_scope[scope] if @effective_locals_by_scope.key?(scope) + return EMPTY_SET if scope.nil? || seen.include?(scope) + + seen.add(scope) + parent = @scope_parent[scope] + locals = parent ? effective_locals_for(parent, seen).dup : Set.new + locals.merge(@locals_by_scope.fetch(scope, EMPTY_SET)) + @effective_locals_by_scope[scope] = locals.freeze + end end class Node @@ -441,6 +482,18 @@ def full_name private + def children(node = raw) + return [] unless node + + context.children(node) + end + + def named_children(node = raw) + return [] unless node + + context.named_children(node) + end + def statement_node(node) return nil unless node if %w[body_statement block_body then].include?(node.kind) @@ -453,10 +506,10 @@ def statement_node(node) class ProgramNode < Node def statements - if raw.children.first&.text.to_s == "{" && raw.children.last&.text.to_s == "}" + if children.first&.text.to_s == "{" && children.last&.text.to_s == "}" return @statements ||= StatementsNode.synthetic(context, raw, [context.wrap(raw, force: HiddenHashNode)].compact) end - if raw.children.first&.text.to_s == "[" && raw.children.last&.text.to_s == "]" + if children.first&.text.to_s == "[" && children.last&.text.to_s == "]" return @statements ||= StatementsNode.synthetic(context, raw, [context.wrap(raw, force: HiddenArrayNode)].compact) end @@ -493,7 +546,8 @@ def self.synthetic(context, raw, children) end def initialize(context, raw, children = nil) - super(context, raw, children || statement_children(context, raw)) + super(context, raw, children || []) + @children = statement_children(context, raw) unless children end def body @@ -517,7 +571,7 @@ def statement_children(context, raw) if expression_container?(raw) && expression_body_statement?(raw) if simple_child_expression?(raw) - return [context.wrap(raw.named_children.first)].compact + return [context.wrap(named_children(raw).first)].compact end return [context.wrap(raw, force: expression_class(context, raw))].compact @@ -531,26 +585,26 @@ def expression_container?(raw) end def hidden_def_statement?(raw) - raw.kind == "body_statement" && (raw.children.first&.kind == "def" || - raw.named_children.any? { |child| child.kind == "argument_list" && child.children.first&.kind == "def" }) + raw.kind == "body_statement" && (children(raw).first&.kind == "def" || + named_children(raw).any? { |child| child.kind == "argument_list" && children(child).first&.kind == "def" }) end def return_body_statement?(raw) return false unless %w[body_statement block_body then].include?(raw.kind) - return false unless raw.children.first&.kind == "return" - return false if %w[block_body then].include?(raw.kind) && raw.named_children.any? { |child| child.kind == "return" } + return false unless children(raw).first&.kind == "return" + return false if %w[block_body then].include?(raw.kind) && named_children(raw).any? { |child| child.kind == "return" } true end def control_body_statement?(raw) raw.kind == "body_statement" && - %w[if unless while until case begin].include?(raw.children.first&.kind) && + %w[if unless while until case begin].include?(children(raw).first&.kind) && !wrapped_control_statement_list?(raw) end def control_body_statement_class(raw) - case raw.children.first&.kind + case children(raw).first&.kind when "if" then IfNode when "unless" then UnlessNode when "while" then WhileNode @@ -561,14 +615,14 @@ def control_body_statement_class(raw) end def wrapped_control_statement_list?(raw) - first_named = raw.named_children.first - %w[body_statement if unless while until case begin].include?(first_named&.kind) && raw.named_children.size > 1 + first_named = named_children(raw).first + %w[body_statement if unless while until case begin].include?(first_named&.kind) && named_children(raw).size > 1 end def expression_body_statement?(raw) - first = raw.children.first + first = children(raw).first return false if first && %w[def class module if unless while until case begin rescue ensure].include?(first.kind) - return false if raw.named_children.any? { |child| %w[method singleton_method class module].include?(child.kind) } + return false if named_children(raw).any? { |child| %w[method singleton_method class module].include?(child.kind) } return false if top_level_statement_list?(raw) true @@ -576,42 +630,42 @@ def expression_body_statement?(raw) def top_level_statement_list?(raw) return false unless %w[body_statement block_body then].include?(raw.kind) - return false if raw.named_children.size <= 1 + return false if named_children(raw).size <= 1 return false if direct_token?(raw, ".") || direct_token?(raw, "&.") return false if direct_token?(raw, "[") || direct_token?(raw, "]") - return false if raw.children.first&.text.to_s == "[" - return false if raw.children.first&.text.to_s == "{" + return false if children(raw).first&.text.to_s == "[" + return false if children(raw).first&.text.to_s == "{" return false if direct_token?(raw, "=") return false if direct_token?(raw, "rescue") return false if direct_token?(raw, "?") && direct_token?(raw, ":") - return false if raw.children.any? { |child| !child.named? && child.text.to_s.match?(/\A(?:==|!=|===|<=>|<=|>=|<<|>>|<|>|\.\.\.?|\+|-|\*|\/|%|\|\||&&|or|and)\z/) } - return false if raw.named_children.any? { |child| %w[argument_list block do_block].include?(child.kind) } + return false if children(raw).any? { |child| !child.named? && child.text.to_s.match?(/\A(?:==|!=|===|<=>|<=|>=|<<|>>|<|>|\.\.\.?|\+|-|\*|\/|%|\|\||&&|or|and)\z/) } + return false if named_children(raw).any? { |child| %w[argument_list block do_block].include?(child.kind) } true end def simple_child_expression?(raw) - raw.named_children.size == 1 && - !raw.children.any? { |child| !child.named? && %w[. &. [ ] " ' ! not].include?(child.text.to_s) } && - !raw.children.any? { |child| !child.named? && child.text.to_s.match?(/\A(?:==|!=|===|<=>|<=|>=|<<|>>|<|>|\.\.\.?|\+|-|\*|\/|%|\|\||&&|or|and)\z/) } + named_children(raw).size == 1 && + !children(raw).any? { |child| !child.named? && %w[. &. [ ] " ' ! not].include?(child.text.to_s) } && + !children(raw).any? { |child| !child.named? && child.text.to_s.match?(/\A(?:==|!=|===|<=>|<=|>=|<<|>>|<|>|\.\.\.?|\+|-|\*|\/|%|\|\||&&|or|and)\z/) } end def expression_class(parse_context, raw) - texts = raw.children.map { |child| child.text.to_s } - return HiddenArrayNode if raw.children.first&.text.to_s == "[" && texts.include?("]") - return HiddenHashNode if raw.children.first&.text.to_s == "{" && texts.include?("}") + texts = children(raw).map { |child| child.text.to_s } + return HiddenArrayNode if children(raw).first&.text.to_s == "[" && texts.include?("]") + return HiddenHashNode if children(raw).first&.text.to_s == "{" && texts.include?("}") if (assignment_class = hidden_assignment_class(raw)) return assignment_class end return RescueModifierNode if texts.include?("rescue") - return HiddenUnaryNode if %w[! not].include?(raw.children.first&.text.to_s) + return HiddenUnaryNode if %w[! not].include?(children(raw).first&.text.to_s) return HiddenElementReferenceNode if texts.include?("[") && texts.include?("]") if (texts & %w[|| or]).any? return HiddenOrNode end return RangeNode if (texts & %w[.. ...]).any? - return HiddenBinaryNode if raw.children.any? { |child| !child.named? && child.text.to_s.match?(/\A(?:==|!=|===|<=>|<=|>=|<<|>>|<|>|\+|-|\*|\/|%)\z/) } - return HiddenCallNode if texts.include?(".") || raw.children.any? { |child| %w[argument_list block do_block].include?(child.kind) } + return HiddenBinaryNode if children(raw).any? { |child| !child.named? && child.text.to_s.match?(/\A(?:==|!=|===|<=>|<=|>=|<<|>>|<|>|\+|-|\*|\/|%)\z/) } + return HiddenCallNode if texts.include?(".") || children(raw).any? { |child| %w[argument_list block do_block].include?(child.kind) } return StringNode if texts.first == "\"" || texts.first == "'" return SymbolNode if raw.text.to_s.start_with?(":") return IntegerNode if raw.text.to_s.match?(/\A\s*-?\d+\s*\z/) @@ -623,12 +677,12 @@ def expression_class(parse_context, raw) return parse_context.local_name?(raw) ? LocalVariableReadNode : CallNode end - child = raw.named_children.first + child = named_children(raw).first return ConstantReadNode if child&.kind == "constant" return InstanceVariableReadNode if child&.kind == "instance_variable" return ClassVariableReadNode if child&.kind == "class_variable" return GlobalVariableReadNode if child&.kind == "global_variable" - if raw.named_children.empty? + if named_children(raw).empty? text = raw.text.to_s.strip return ClassVariableReadNode if text.start_with?("@@") return InstanceVariableReadNode if text.start_with?("@") @@ -639,14 +693,14 @@ def expression_class(parse_context, raw) end def direct_token?(raw, text) - raw.children.any? { |child| !child.named? && child.text.to_s == text } + children(raw).any? { |child| !child.named? && child.text.to_s == text } end def hidden_assignment_class(raw) return false unless direct_token?(raw, "=") - return false if raw.children.any? { |child| !child.named? && %w[== != <= >= ===].include?(child.text.to_s) } + return false if children(raw).any? { |child| !child.named? && %w[== != <= >= ===].include?(child.text.to_s) } - lhs = raw.named_children.first + lhs = named_children(raw).first case lhs&.kind when "call", "element_reference" then HiddenSetterCallNode when "identifier" then LocalVariableWriteNode @@ -665,12 +719,12 @@ def identifier_like_text?(text) class ClassNode < Node def constant_path - context.wrap(context.named_field(raw, "name") || raw.named_children.first, force: ConstantReadNode) + context.wrap(context.named_field(raw, "name") || named_children.first, force: ConstantReadNode) end def body body_raw = context.named_field(raw, "body") || - (raw.kind == "body_statement" ? raw.named_children.find { |child| child.kind == "body_statement" } : nil) + (raw.kind == "body_statement" ? named_children.find { |child| child.kind == "body_statement" } : nil) return nil unless body_raw if hidden_class_or_module_statement?(body_raw) || hidden_singleton_class_statement?(body_raw) return StatementsNode.synthetic(context, body_raw, [context.wrap(body_raw)].compact) @@ -687,16 +741,16 @@ def child_nodes def hidden_class_or_module_statement?(node) node.kind == "body_statement" && - %w[class module].include?(node.children.first&.kind) && - !node.children.first.named? && - !node.children.any? { |child| !child.named? && child.text.to_s == "<<" } + %w[class module].include?(children(node).first&.kind) && + !children(node).first.named? && + !children(node).any? { |child| !child.named? && child.text.to_s == "<<" } end def hidden_singleton_class_statement?(node) node.kind == "body_statement" && - node.children.first&.kind == "class" && - !node.children.first.named? && - node.children.any? { |child| !child.named? && child.text.to_s == "<<" } + children(node).first&.kind == "class" && + !children(node).first.named? && + children(node).any? { |child| !child.named? && child.text.to_s == "<<" } end end @@ -704,11 +758,11 @@ class ModuleNode < ClassNode; end class SingletonClassNode < Node def expression - context.wrap(context.named_field(raw, "value") || raw.named_children.first) + context.wrap(context.named_field(raw, "value") || named_children.first) end def body - body_raw = context.named_field(raw, "body") || raw.named_children.last + body_raw = context.named_field(raw, "body") || named_children.last body_raw ? context.wrap(body_raw, force: StatementsNode) : nil end @@ -720,19 +774,19 @@ def child_nodes class DefNode < Node def name if hidden_body_statement_def? - hidden_def_container.named_children.find { |child| child.kind == "identifier" }&.text.to_s.to_sym + named_children(hidden_def_container).find { |child| child.kind == "identifier" }&.text.to_s.to_sym else - (context.named_field(raw, "name") || raw.named_children.find { |child| child.kind == "identifier" })&.text.to_s.to_sym + (context.named_field(raw, "name") || named_children.find { |child| child.kind == "identifier" })&.text.to_s.to_sym end end def receiver if hidden_body_statement_def? container = hidden_def_container - dot = container.children.index { |child| !child.named? && child.text.to_s == "." } + dot = children(container).index { |child| !child.named? && child.text.to_s == "." } return nil unless dot - return context.wrap(container.children[dot - 1]) if dot.positive? + return context.wrap(children(container)[dot - 1]) if dot.positive? end context.wrap(context.named_field(raw, "object")) @@ -740,7 +794,7 @@ def receiver def parameters node = if hidden_body_statement_def? - hidden_def_container.named_children.find { |child| child.kind == "method_parameters" } + named_children(hidden_def_container).find { |child| child.kind == "method_parameters" } else context.named_field(raw, "parameters") end @@ -749,19 +803,19 @@ def parameters def body body_raw = if hidden_body_statement_def? - hidden_def_container.named_children.reverse.find { |child| child.kind == "body_statement" || !%w[identifier method_parameters self].include?(child.kind) } + named_children(hidden_def_container).reverse.find { |child| child.kind == "body_statement" || !%w[identifier method_parameters self].include?(child.kind) } else - context.named_field(raw, "body") || raw.named_children.reverse.find { |child| child.kind != "method_parameters" && child.kind != "identifier" && child.kind != "self" } + context.named_field(raw, "body") || named_children.reverse.find { |child| child.kind != "method_parameters" && child.kind != "identifier" && child.kind != "self" } end return nil unless body_raw - return begin_body(body_raw) if body_raw.kind == "body_statement" && body_raw.named_children.any? { |child| %w[rescue ensure].include?(child.kind) } + return begin_body(body_raw) if body_raw.kind == "body_statement" && named_children(body_raw).any? { |child| %w[rescue ensure].include?(child.kind) } statement_node(body_raw) end def name_loc node = if hidden_body_statement_def? - hidden_def_container.named_children.find { |child| child.kind == "identifier" } + named_children(hidden_def_container).find { |child| child.kind == "identifier" } else context.named_field(raw, "name") end @@ -769,12 +823,12 @@ def name_loc end def rparen_loc - token = hidden_body_statement_def? ? hidden_def_container.children.find { |child| child.text.to_s == ")" } : raw.children.find { |child| child.text.to_s == ")" } + token = hidden_body_statement_def? ? children(hidden_def_container).find { |child| child.text.to_s == ")" } : children.find { |child| child.text.to_s == ")" } token && context.location(token) end def end_keyword_loc - token = hidden_body_statement_def? ? hidden_def_container.children.reverse.find { |child| child.text.to_s == "end" } : raw.children.reverse.find { |child| child.text.to_s == "end" } + token = hidden_body_statement_def? ? children(hidden_def_container).reverse.find { |child| child.text.to_s == "end" } : children.reverse.find { |child| child.text.to_s == "end" } token && context.location(token) end @@ -793,9 +847,9 @@ def hidden_body_statement_def? end def hidden_def_container - return raw if raw.children.first&.kind == "def" + return raw if children.first&.kind == "def" - raw.named_children.find { |child| child.kind == "argument_list" && child.children.first&.kind == "def" } + named_children.find { |child| child.kind == "argument_list" && children(child).first&.kind == "def" } end end @@ -810,7 +864,7 @@ def initialize(context, raw) @rest = nil @keyword_rest = nil @block = nil - raw.named_children.each do |child| + named_children.each do |child| param = ParameterNode.new(context, child) case child.kind when "optional_parameter" @@ -849,13 +903,13 @@ def child_nodes class ParameterNode < Node def name node = context.named_field(raw, "name") || - raw.named_children.find { |child| child.kind == "identifier" } || + named_children.find { |child| child.kind == "identifier" } || (raw.kind == "identifier" ? raw : nil) node&.text&.to_sym end def value - node = context.named_field(raw, "value") || raw.named_children.find { |child| child != context.named_field(raw, "name") && child.kind != "identifier" } + node = context.named_field(raw, "value") || named_children.find { |child| child != context.named_field(raw, "name") && child.kind != "identifier" } context.wrap(node) end end @@ -867,7 +921,7 @@ def parameters end def body - node = context.named_field(raw, "body") || raw.named_children.reject { |child| child.kind == "block_parameters" }.last + node = context.named_field(raw, "body") || named_children.reject { |child| child.kind == "block_parameters" }.last statement_node(node) end @@ -897,7 +951,7 @@ class ConstantPathNode < ConstantReadNode; end class WriteNode < VariableNode def target - context.named_field(raw, "left") || raw.named_children.first + context.named_field(raw, "left") || named_children.first end def name @@ -905,7 +959,7 @@ def name end def value - context.wrap(context.named_field(raw, "right") || context.named_field(raw, "value") || raw.named_children[1]) + context.wrap(context.named_field(raw, "right") || context.named_field(raw, "value") || named_children[1]) end def child_nodes @@ -921,19 +975,19 @@ class ConstantWriteNode < WriteNode; end class ConstantPathWriteNode < WriteNode def target - context.named_field(raw, "left") || raw.named_children.first + context.named_field(raw, "left") || named_children.first end end class CallNode < Node def receiver if raw.kind == "element_reference" - context.wrap(context.named_field(raw, "object") || raw.named_children.first) + context.wrap(context.named_field(raw, "object") || named_children.first) elsif %w[assignment operator_assignment].include?(raw.kind) - lhs = context.named_field(raw, "left") || raw.named_children.first - context.wrap(context.named_field(lhs, "object") || lhs&.named_children&.first) + lhs = context.named_field(raw, "left") || named_children.first + context.wrap(context.named_field(lhs, "object") || (lhs ? named_children(lhs).first : nil)) elsif raw.kind == "binary" - context.wrap(raw.named_children.first) + context.wrap(named_children.first) elsif raw.kind == "unary" nil elsif raw.kind == "argument_list" @@ -953,7 +1007,7 @@ def name elsif raw.kind == "binary" operator_token&.text.to_s.to_sym elsif raw.kind == "unary" - raw.children.find { |child| !child.named? }&.text.to_s.to_sym + children.find { |child| !child.named? }&.text.to_s.to_sym elsif raw.kind == "argument_list" text = raw.text.to_s.strip return text.to_sym if text.match?(/\A[a-z_]\w*[!?=]?\z/) @@ -962,7 +1016,7 @@ def name elsif raw.kind == "identifier" raw.text.to_s.to_sym else - node = context.named_field(raw, "method") || method_after_dot || raw.named_children.find { |child| child.kind == "identifier" } + node = context.named_field(raw, "method") || method_after_dot || named_children.find { |child| child.kind == "identifier" } return node.text.to_s.to_sym if node return raw.text.to_s.strip.to_sym if %w[body_statement block_body then].include?(raw.kind) && raw.text.to_s.strip.match?(/\A[a-z_]\w*[!?=]?\z/) @@ -973,16 +1027,16 @@ def name def arguments args = if raw.kind == "element_reference" - raw.named_children.drop(1) + named_children.drop(1) elsif %w[assignment operator_assignment].include?(raw.kind) - lhs = context.named_field(raw, "left") || raw.named_children.first - lhs_args = lhs ? lhs.named_children.drop(1) : [] - lhs_args + [context.named_field(raw, "right") || raw.named_children[1]].compact + lhs = context.named_field(raw, "left") || named_children.first + lhs_args = lhs ? named_children(lhs).drop(1) : [] + lhs_args + [context.named_field(raw, "right") || named_children[1]].compact elsif raw.kind == "binary" - [raw.named_children[1]].compact + [named_children[1]].compact else arg_raw = context.named_field(raw, "arguments") - arg_raw ||= raw.named_children.find { |child| child.kind == "argument_list" } + arg_raw ||= named_children.find { |child| child.kind == "argument_list" } return context.wrap(arg_raw, force: ArgumentsNode) if arg_raw [] @@ -991,12 +1045,12 @@ def arguments end def block - node = context.named_field(raw, "block") || raw.named_children.find { |child| %w[block do_block].include?(child.kind) } + node = context.named_field(raw, "block") || named_children.find { |child| %w[block do_block].include?(child.kind) } context.wrap(node, force: BlockNode) end def safe_navigation? - raw.children.any? { |child| child.text.to_s == "&." } + children.any? { |child| child.text.to_s == "&." } end def child_nodes @@ -1006,25 +1060,25 @@ def child_nodes private def operator_token - raw.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) } + children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) } end def dot_index - raw.children.index { |child| !child.named? && %w[. &.].include?(child.text.to_s) } + children.index { |child| !child.named? && %w[. &.].include?(child.text.to_s) } end def receiver_before_dot idx = dot_index return nil unless idx - raw.children[0...idx].reverse.find(&:named?) + children[0...idx].reverse.find(&:named?) end def method_after_dot idx = dot_index return nil unless idx - raw.children[(idx + 1)..].to_a.find { |child| child.named? && child.kind == "identifier" } + children[(idx + 1)..].to_a.find { |child| child.named? && child.kind == "identifier" } end end @@ -1032,46 +1086,46 @@ class HiddenCallNode < CallNode def receiver return nil unless dot_index - context.wrap(raw.named_children.first) + context.wrap(named_children.first) end def name if (idx = dot_named_index) - raw.named_children[idx]&.text.to_s.to_sym + named_children[idx]&.text.to_s.to_sym else - raw.named_children.first&.text.to_s.to_sym + named_children.first&.text.to_s.to_sym end end def arguments - if (arg_raw = raw.named_children.find { |child| child.kind == "argument_list" }) + if (arg_raw = named_children.find { |child| child.kind == "argument_list" }) return context.wrap(arg_raw, force: ArgumentsNode) end start = dot_named_index ? dot_named_index + 1 : 1 - args = raw.named_children[start..].to_a.reject { |child| %w[block do_block].include?(child.kind) } + args = named_children[start..].to_a.reject { |child| %w[block do_block].include?(child.kind) } ArgumentsNode.synthetic(context, raw, args.filter_map { |child| context.wrap(child) }) end def block - node = raw.named_children.find { |child| %w[block do_block].include?(child.kind) } + node = named_children.find { |child| %w[block do_block].include?(child.kind) } context.wrap(node, force: BlockNode) end def safe_navigation? - raw.children.any? { |child| child.text.to_s == "&." } + children.any? { |child| child.text.to_s == "&." } end private def dot_index - @dot_index ||= raw.children.index { |child| !child.named? && %w[. &.].include?(child.text.to_s) } + @dot_index ||= children.index { |child| !child.named? && %w[. &.].include?(child.text.to_s) } end def dot_named_index return nil unless dot_index - raw.named_children.index { |child| child.start_byte > raw.children[dot_index].start_byte } + named_children.index { |child| child.start_byte > children[dot_index].start_byte } end end @@ -1081,17 +1135,17 @@ def receiver end def name - raw.children.first&.text.to_s.to_sym + children.first&.text.to_s.to_sym end def arguments - ArgumentsNode.synthetic(context, raw, raw.named_children.first(1).filter_map { |child| context.wrap(child) }) + ArgumentsNode.synthetic(context, raw, named_children.first(1).filter_map { |child| context.wrap(child) }) end end class HiddenElementReferenceNode < CallNode def receiver - context.wrap(raw.named_children.first) + context.wrap(named_children.first) end def name @@ -1099,7 +1153,7 @@ def name end def arguments - ArgumentsNode.synthetic(context, raw, raw.named_children.drop(1).filter_map { |child| context.wrap(child) }) + ArgumentsNode.synthetic(context, raw, named_children.drop(1).filter_map { |child| context.wrap(child) }) end end @@ -1117,10 +1171,10 @@ def name def arguments args = if lhs_element_reference? - lhs = context.wrap(raw.named_children.first, force: HiddenElementReferenceNode) - (lhs.arguments&.arguments || []) + [context.wrap(raw.named_children[1])].compact + lhs = context.wrap(named_children.first, force: HiddenElementReferenceNode) + (lhs.arguments&.arguments || []) + [context.wrap(named_children[1])].compact else - [context.wrap(raw.named_children[1])].compact + [context.wrap(named_children[1])].compact end ArgumentsNode.synthetic(context, raw, args) end @@ -1128,25 +1182,25 @@ def arguments private def lhs_element_reference? - raw.named_children.first&.kind == "element_reference" + named_children.first&.kind == "element_reference" end def lhs_call - @lhs_call ||= context.wrap(raw.named_children.first) + @lhs_call ||= context.wrap(named_children.first) end end class HiddenBinaryNode < CallNode def receiver - context.wrap(raw.named_children.first) + context.wrap(named_children.first) end def name - raw.children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s.to_sym + children.find { |child| !child.named? && !%w[( )].include?(child.text.to_s) }&.text.to_s.to_sym end def arguments - ArgumentsNode.synthetic(context, raw, [context.wrap(raw.named_children[1])].compact) + ArgumentsNode.synthetic(context, raw, [context.wrap(named_children[1])].compact) end end @@ -1156,7 +1210,8 @@ def self.synthetic(context, raw, children) end def initialize(context, raw, children = nil) - super(context, raw, children || build_arguments(context, raw)) + super(context, raw, children || []) + @children = build_arguments(context, raw) unless children end def arguments @@ -1167,7 +1222,7 @@ def arguments def build_arguments(context, raw) if quoted_argument_list?(raw) - klass = raw.named_children.any? { |child| child.kind == "interpolation" } ? InterpolatedStringNode : StringNode + klass = named_children(raw).any? { |child| child.kind == "interpolation" } ? InterpolatedStringNode : StringNode return [context.wrap(raw, force: klass)].compact end if (klass = scalar_expression_argument_list_class(context, raw)) @@ -1221,8 +1276,8 @@ def scalar_argument_class(context, raw) def quoted_argument_list?(raw) return false unless raw.kind == "argument_list" - first = raw.children.first&.text.to_s - last = raw.children.last&.text.to_s + first = children(raw).first&.text.to_s + last = children(raw).last&.text.to_s %w[" '].include?(first) && first == last end @@ -1230,7 +1285,7 @@ def scalar_expression_argument_list_class(context, raw) return nil unless raw.kind == "argument_list" return nil if parenthesized_argument_list?(raw) - texts = raw.children.map { |child| child.text.to_s } + texts = children(raw).map { |child| child.text.to_s } first = texts.first return HiddenArrayNode if first == "[" && texts.include?("]") return HiddenHashNode if first == "{" && texts.include?("}") @@ -1238,14 +1293,14 @@ def scalar_expression_argument_list_class(context, raw) return HiddenElementReferenceNode if texts.include?("[") && texts.include?("]") return HiddenOrNode if (texts & %w[|| or]).any? return RangeNode if (texts & %w[.. ...]).any? - return HiddenBinaryNode if raw.children.any? { |child| !child.named? && child.text.to_s.match?(/\A(?:==|!=|===|<=>|<=|>=|<<|>>|<|>|\+|-|\*|\/|%)\z/) } - return HiddenCallNode if texts.include?(".") || raw.named_children.any? { |child| %w[argument_list block do_block].include?(child.kind) } + return HiddenBinaryNode if children(raw).any? { |child| !child.named? && child.text.to_s.match?(/\A(?:==|!=|===|<=>|<=|>=|<<|>>|<|>|\+|-|\*|\/|%)\z/) } + return HiddenCallNode if texts.include?(".") || named_children(raw).any? { |child| %w[argument_list block do_block].include?(child.kind) } scalar_argument_class(context, raw) end def parenthesized_argument_list?(raw) - raw.children.first&.text.to_s == "(" && raw.children.last&.text.to_s == ")" + children(raw).first&.text.to_s == "(" && children(raw).last&.text.to_s == ")" end end @@ -1285,11 +1340,17 @@ def elements class AssocNode < Node def key - context.wrap(context.named_field(raw, "key") || raw.named_children.first) + context.wrap(context.named_field(raw, "key") || named_children.first) end def value - context.wrap(context.named_field(raw, "value") || raw.named_children[1]) + value_raw = context.named_field(raw, "value") || named_children[1] + return context.wrap(value_raw) if value_raw + + key_raw = context.named_field(raw, "key") || named_children.first + if key_raw && context.local_name?(key_raw) + context.wrap(key_raw, force: LocalVariableReadNode) + end end def child_nodes @@ -1343,7 +1404,7 @@ class YieldNode < Node; end class ReturnNode < Node def arguments - args = raw.named_children.flat_map do |child| + args = named_children.flat_map do |child| node = context.wrap(child) node.is_a?(ArgumentsNode) ? node.arguments : [node].compact end @@ -1357,7 +1418,7 @@ def child_nodes class ParenthesesNode < Node def body - context.wrap(raw.named_children.first) + context.wrap(named_children.first) end def child_nodes @@ -1367,20 +1428,20 @@ def child_nodes class IfNode < Node def predicate - context.wrap(context.named_field(raw, "condition") || raw.named_children.first) + context.wrap(context.named_field(raw, "condition") || named_children.first) end def statements node = context.named_field(raw, "consequence") || - raw.named_children.find { |child| child.kind == "then" } || - raw.named_children[1] + named_children.find { |child| child.kind == "then" } || + named_children[1] statement_node(node) end def subsequent node = context.named_field(raw, "alternative") || - raw.named_children.find { |child| %w[else elsif].include?(child.kind) } || - raw.named_children[2] + named_children.find { |child| %w[else elsif].include?(child.kind) } || + named_children[2] context.wrap(node) end @@ -1391,20 +1452,20 @@ def child_nodes class UnlessNode < Node def predicate - context.wrap(context.named_field(raw, "condition") || raw.named_children.first) + context.wrap(context.named_field(raw, "condition") || named_children.first) end def statements node = context.named_field(raw, "consequence") || - raw.named_children.find { |child| child.kind == "then" } || - raw.named_children[1] + named_children.find { |child| child.kind == "then" } || + named_children[1] statement_node(node) end def subsequent node = context.named_field(raw, "alternative") || - raw.named_children.find { |child| %w[else elsif].include?(child.kind) } || - raw.named_children[2] + named_children.find { |child| %w[else elsif].include?(child.kind) } || + named_children[2] context.wrap(node) end @@ -1419,13 +1480,13 @@ def child_nodes class WhileNode < Node def predicate - context.wrap(context.named_field(raw, "condition") || raw.named_children.first) + context.wrap(context.named_field(raw, "condition") || named_children.first) end def statements node = context.named_field(raw, "body") || - raw.named_children.find { |child| child.kind == "then" } || - raw.named_children[1] + named_children.find { |child| child.kind == "then" } || + named_children[1] statement_node(node) end @@ -1442,15 +1503,15 @@ class UntilNode < WhileNode; end class CaseNode < Node def predicate - context.wrap(context.named_field(raw, "value") || raw.named_children.first) + context.wrap(context.named_field(raw, "value") || named_children.first) end def conditions - raw.named_children.select { |child| child.kind == "when" }.filter_map { |child| context.wrap(child, force: WhenNode) } + named_children.select { |child| child.kind == "when" }.filter_map { |child| context.wrap(child, force: WhenNode) } end def else_clause - node = raw.named_children.find { |child| child.kind == "else" } + node = named_children.find { |child| child.kind == "else" } context.wrap(node, force: ElseNode) end @@ -1461,12 +1522,12 @@ def child_nodes class WhenNode < Node def conditions - nodes = raw.named_children.take_while { |child| child.kind != "then" } + nodes = named_children.take_while { |child| child.kind != "then" } nodes.filter_map { |child| context.wrap(child) } end def statements - body = context.named_field(raw, "body") || raw.named_children.find { |child| child.kind == "then" } + body = context.named_field(raw, "body") || named_children.find { |child| child.kind == "then" } statement_node(body) end @@ -1488,22 +1549,22 @@ def child_nodes class BeginNode < Node def statements - body_children = raw.named_children.reject { |child| %w[rescue ensure else].include?(child.kind) } + body_children = named_children.reject { |child| %w[rescue ensure else].include?(child.kind) } StatementsNode.synthetic(context, raw, body_children.filter_map { |child| context.wrap(child) }) end def rescue_clause - node = raw.named_children.find { |child| child.kind == "rescue" } + node = named_children.find { |child| child.kind == "rescue" } context.wrap(node, force: RescueNode) end def else_clause - node = raw.named_children.find { |child| child.kind == "else" } + node = named_children.find { |child| child.kind == "else" } context.wrap(node, force: ElseNode) end def ensure_clause - node = raw.named_children.find { |child| child.kind == "ensure" } + node = named_children.find { |child| child.kind == "ensure" } context.wrap(node, force: EnsureNode) end @@ -1514,21 +1575,21 @@ def child_nodes class RescueNode < Node def exceptions - node = context.named_field(raw, "exceptions") || raw.named_children.find { |child| child.kind == "exceptions" } - node ? node.named_children.filter_map { |child| context.wrap(child) } : [] + node = context.named_field(raw, "exceptions") || named_children.find { |child| child.kind == "exceptions" } + node ? named_children(node).filter_map { |child| context.wrap(child) } : [] end def statements - body = context.named_field(raw, "body") || raw.named_children.find { |child| child.kind == "then" } + body = context.named_field(raw, "body") || named_children.find { |child| child.kind == "then" } statement_node(body) end def subsequent - body = context.named_field(raw, "body") || raw.named_children.find { |child| child.kind == "then" } + body = context.named_field(raw, "body") || named_children.find { |child| child.kind == "then" } return nil unless body seen_body = false - node = raw.named_children.find do |child| + node = named_children.find do |child| if seen_body && child.kind == "rescue" true else @@ -1566,11 +1627,11 @@ def child_nodes class RescueModifierNode < Node def expression - context.wrap(raw.named_children.first) + context.wrap(named_children.first) end def rescue_expression - context.wrap(raw.named_children[1]) + context.wrap(named_children[1]) end def child_nodes @@ -1591,11 +1652,11 @@ class IndexOrWriteNode < IndexOperatorWriteNode; end class OrNode < Node def left - context.wrap(raw.named_children.first) + context.wrap(named_children.first) end def right - context.wrap(raw.named_children[1]) + context.wrap(named_children[1]) end def child_nodes @@ -1605,11 +1666,11 @@ def child_nodes class HiddenOrNode < OrNode def left - context.wrap(raw.named_children.first) + context.wrap(named_children.first) end def right - context.wrap(raw.named_children[1]) + context.wrap(named_children[1]) end end end diff --git a/gems/nil-kill/spec/source_index_spec.rb b/gems/nil-kill/spec/source_index_spec.rb index d11c942f6..ea07ad546 100644 --- a/gems/nil-kill/spec/source_index_spec.rb +++ b/gems/nil-kill/spec/source_index_spec.rb @@ -1021,6 +1021,33 @@ def call(items, map, names) end end + it "records shorthand keyword arguments as local param origins" do + Dir.mktmpdir("nil-kill-shorthand-keyword-param-origin") do |dir| + path = File.join(dir, "shorthand_keyword_param_origin.rb") + File.write(path, <<~RUBY) + class ShorthandKeywordParamOrigin + extend T::Sig + + sig { params(value: T.untyped).void } + def sink(value:); end + + sig { params(value: String).void } + def call(value) + sink(value:) + end + end + RUBY + + idx = described_class.new(path) + origin = idx.param_origins.find { |entry| entry["callee"] == "sink" && entry["slot"] == "value" } + + expect(origin).to include( + "origin_kind" => "local", + "code" => "value" + ) + end + end + it "infers static return origins for Ruby iterator and collection mutation calls" do Dir.mktmpdir("nil-kill-iterator-return-origin") do |dir| path = File.join(dir, "iterator_return_origin.rb") From cdec71da7830727a522e21b9b55b7a4ed97de2ae Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 02:18:55 +0000 Subject: [PATCH 2/3] Fix nil-kill static analysis architecture --- .../multi-language-static-analysis-fix.md | 281 +++++ .../lib/nil_kill/decomplex_static_facts.rb | 1122 +++++++++++++++++ .../lib/nil_kill/languages/provider.rb | 409 +----- .../nil_kill/languages/providers/python.rb | 249 +--- .../lib/nil_kill/languages/providers/ruby.rb | 345 +---- .../languages/providers/ruby/sorbet.rb | 59 - .../languages/providers/typescript.rb | 204 +-- gems/nil-kill/lib/nil_kill/static_evidence.rb | 35 +- .../spec/alias_recommendations_spec.rb | 25 +- .../spec/multi_language_runtime_spec.rb | 36 +- 10 files changed, 1463 insertions(+), 1302 deletions(-) create mode 100644 gems/nil-kill/docs/agents/multi-language-static-analysis-fix.md create mode 100644 gems/nil-kill/lib/nil_kill/decomplex_static_facts.rb diff --git a/gems/nil-kill/docs/agents/multi-language-static-analysis-fix.md b/gems/nil-kill/docs/agents/multi-language-static-analysis-fix.md new file mode 100644 index 000000000..7dd703c9d --- /dev/null +++ b/gems/nil-kill/docs/agents/multi-language-static-analysis-fix.md @@ -0,0 +1,281 @@ +# Multi-Language Static Analysis Fix + +## Problem + +Nil-kill currently has a misleading static-analysis boundary. `StaticEvidence` +looks language-neutral, but the Ruby provider previously called `SourceIndex` +from inside `provider.static_evidence`. `SourceIndex` is Ruby/Sorbet-specific: +it parses Ruby through Nil-kill's Ruby syntax facade, knows about RBI/Sorbet +facts, and contains inference-oriented collectors that are not portable. + +That means the current "multi-language provider" shape overclaims support. The +portable part is structural evidence collection. The Ruby-only part is +inference, checker/RBI integration, and rewrite planning. These must be modeled +as separate capabilities. + +## Decision + +`StaticEvidence` must not call `SourceIndex`, directly or indirectly. The static +analysis path should consume Tree-sitter facts from Decomplex and Nil-kill +language adapters. Ruby inference may continue to use `SourceIndex` until it is +renamed/split, but that path is not the shared static-analysis path. + +Nil-kill-specific static analysis should be implemented by extending +Decomplex's Tree-sitter normalization/adapters and consuming the normalized +facts from Nil-kill. It must not smuggle Ruby-specific behavior through +`Language::Provider`. + +## What Static Analysis Needs + +Static analysis should emit only facts that are useful without a language +checker or standard-library type model: + +- `files`: path, language, parser, line count +- `methods`: owner, name, kind, path, line/span, params, syntactic signature +- `fields`: owner, name, path, line/span, declared type when syntactically present +- Decomplex input facts: `state_reads` and `state_writes` +- `state_types`: declared state/field type by `owner\0field` +- `state_protocols`: method names called on known state fields +- `state_param_origins`: constructor/function params assigned into state fields +- `type_definitions`: syntactic method signatures, state fields, type aliases, + included modules or equivalent composition facts +- `hash_shapes`: literal object/hash/dict/map keys and conservative literal + value types +- `array_shapes`: literal/list/array element types, size, homogeneous flag, and + tuple-like positional types +- `alias_recommendations`: derived from `type_definitions` +- `language_capabilities`: what the adapter can actually emit + +These are report/evidence facts. They are not proof of inferred types. + +Decomplex already provides `state_reads` and `state_writes`; Nil-kill should not +reimplement those collectors. Nil-kill's static layer should consume those facts +and derive Nil-kill-specific records such as fields, protocols, param origins, +slot coverage, and pressure reports. + +## What Decomplex Already Has + +Decomplex is already the right owner for language-neutral syntax facts: + +- language detection and Tree-sitter parser selection +- parsed `Document` with source, lines, root node, language, and adapter +- `structural_facts(document)` with: + - `function_defs` + - `owner_defs` + - `call_sites` + - `state_declarations` + - `state_param_origins` + - `state_reads` + - `state_writes` +- branch/decision facts used by other Decomplex detectors +- a normalized AST path through `Decomplex::Ast::TreeSitterNormalizer` +- language-specific normalization adapters for generic syntax shape +- an O(1) Tree-sitter facade/cache pattern that avoids repeated parent/child + lookups + +Nil-kill should consume those facts first. If a fact is broadly useful to +Decomplex detectors or to multi-language static analysis, add it to Decomplex's +Syntax/adapter/normalizer layer. Nil-kill should only keep final evidence +assembly and Nil-kill-specific scoring/reporting. + +## New Facts Nil-Kill Needs + +Nil-kill still needs facts that Decomplex either does not emit yet or does not +emit in the exact evidence schema: + +- `array_shapes`: array/list literal size, positional element types, + homogeneous flag, source location, and code +- language-specific type definitions: + - Ruby `sig` method signatures + - Ruby `T::Struct` fields + - Ruby `Struct.new` fields + - Ruby `include` composition facts + - Python `.pyi` signatures and fields + - TypeScript interface/type alias/class field declarations +- corrected owner scopes for Ruby `Struct.new(...) do ... def ... end` +- protocol derivations from known state fields when Decomplex call-site output + is incomplete, such as Ruby ivar member calls around Sorbet `sig` wrappers +- capability metadata that distinguishes "annotation parsing" from real + checker-backed type indexing + +These are static evidence facts. They should not imply inferred receiver types +or safe rewrites. + +## What Static Analysis Must Not Own + +The portable static path must not produce or depend on: + +- RBI parsing or Sorbet return/field indexes +- Ruby stdlib/gem return inference +- receiver method return typing +- return-origin inference used for signature rewrites +- deterministic nil guard rewrite actions +- collection lookup provenance that depends on expression typing +- confidence gates based on "this will typecheck" +- autofix actions + +Those belong to a provider-specific inference/repair backend. Today that backend +is Ruby/Sorbet. + +## Architecture + +Use four explicit layers: + +1. `Decomplex::Syntax` + Parses files with Tree-sitter and exposes raw documents plus generic + structural helpers. + +2. `Decomplex::Ast::TreeSitterNormalizer` and Decomplex adapters + Normalize language-specific Tree-sitter node shapes into common syntax facts. + Extend this layer for multi-language facts Nil-kill needs: array shapes, + owner scopes, state access protocol calls, annotations, and declaration + shapes. + +3. `NilKill::StaticAnalysis` + Consumes normalized Decomplex facts and assembles the Nil-kill evidence + schema. It should not parse language syntax itself. + +4. Provider-specific semantic backends + Optional. Ruby/Sorbet, TypeScript compiler, pyright/mypy, Psalm/PHPStan, + LuaLS, etc. These are not required for static evidence and must not be + represented as available until wired to a real backend. + +`Language::Provider` should be narrowed to capability metadata or split into +explicit capabilities: + +- `syntax_adapter` +- `static_fact_extractor` +- `runtime_tracer` +- `type_backend` +- `rewrite_backend` + +## Tree-Sitter Access Pattern + +The Decomplex Syntax/Normalizer layer must preserve the O(1) Tree-sitter facade +work: + +- wrap raw nodes once per document using stable node ids/byte ranges +- cache parent, children, named children, named fields, text slice, kind, span +- never call `node.parent` repeatedly while walking +- maintain an explicit stack during DFS for owner/function/control context +- perform one primary DFS per file, with small post-processing indexes +- use adapter predicates that receive cached node wrappers, not raw nodes +- avoid source-line regex scans for facts that can be derived from nodes + +Allowed post-processing: + +- de-duplicate records by stable keys +- build `known_state_fields` by owner +- derive `state_protocols` from call sites plus known state fields +- derive alias recommendations from `type_definitions` +- derive slot/collection pressure from hash and array literal shapes + +## Adapter Interface + +Each Decomplex language adapter should implement the smallest useful surface: + +```ruby +module Decomplex + class Adapter + def language; end + def self_receiver_names; end + + def function_definition?(node); end + def function_name(node); end + def function_kind(node); end + def function_params(node); end + def function_signature(node); end + + def owner_definition?(node); end + def owner_name(node, stack); end + def owner_kind(node); end + + def call_target(node, stack); end + def state_read_target(node, stack); end + def state_write_target(node, stack); end + def state_declaration(node, stack); end + def state_param_origin(write, stack); end + + def hash_literal?(node); end + def hash_pairs(node); end + def array_literal?(node); end + def array_elements(node); end + def literal_type(node); end + + def annotation_definitions(node, stack); end + end +end +``` + +Adapters should expose normalized facts through Decomplex `Syntax` and +`TreeSitterNormalizer`. Nil-kill should consume those facts; it should not add +language parsers or regex-based syntax extraction in providers. + +## Language-Specific Scope + +Initial adapters should be honest about what they support: + +- Ruby: syntactic Sorbet signatures, `T::Struct` fields, `Struct.new` fields, + includes, ivar assignment/origins/protocols, hash and array shapes. +- Python: functions/methods, class fields from annotations, `self.x` + assignments, `.pyi` declarations, `typing` aliases, dict/list shapes. +- TypeScript: functions/methods, class/interface fields, `this.x`, declared + params/returns, type aliases/interfaces, object/array shapes. +- Lua/PHP/Perl: start with functions, owner scopes where available, state + assignment conventions, and table/hash/array-like shapes. Do not claim type indexing + until real annotation/type backends exist. + +## Migration Plan + +1. Keep `StaticEvidence` as the public command output, but make it consume + Decomplex normalized facts only. +2. Extend Decomplex `TreeSitterNormalizer`, `Syntax`, and language adapters for + array shapes, annotation definitions, owner scopes, and protocol calls. +3. Port the Ruby-only static facts currently recovered from `SourceIndex` into + Decomplex Tree-sitter collectors: method signatures, `T::Struct` fields, + `Struct.new` fields, includes. +4. Replace line-regex Ruby extractors with Tree-sitter adapter collectors in + Decomplex. +5. Add per-language fixture tests that assert exact evidence records and + explicitly assert that `StaticEvidence` does not instantiate `SourceIndex`. +6. Change capability metadata so `type_indexing` means a real semantic backend, + not "we parsed annotations." +7. Rename or split `SourceIndex` to `RubySourceIndex` for the inference path. + +## Correctness Rules + +- A static record must identify its source as `syntax`, `annotation`, + `runtime`, or provider backend. +- A language must not advertise runtime tracing, type indexing, or rewrites + unless that capability has an implementation and tests. +- Unknown receiver calls are protocol pressure, not inferred receiver types. +- Syntactic annotations are declared types, not inferred types. +- Ruby/Sorbet facts may enrich Ruby reports, but only through explicit Ruby + static or inference backends. + +## Immediate Follow-Up + +After removing `SourceIndex` from `StaticEvidence`, the next implementation +step is to add `NilKill::StaticAnalysis::TreeSitterExtractor` and move the +current `Provider#static_evidence` logic into it. Providers should then become +adapters/capability descriptors rather than hidden extraction engines. + +## Extension Plan + +The multi-language path should extend Tree-sitter support in the following +order: + +1. Extend `Decomplex::Syntax` structs for generic facts. `state_reads` and + `state_writes` already live there and should remain the source of truth. +2. Extend Decomplex adapters for language-specific node-shape recognition: + arrays/lists, object/hash literals, owner scopes, annotations, protocol call + targets, and state declaration forms. +3. Extend `Decomplex::Ast::TreeSitterNormalizer` to normalize those syntax + shapes using the O(1) node facade/cache. Normalizer output should stay + language-neutral. +4. Keep Nil-kill static assembly schema-oriented: merge Decomplex facts, + de-duplicate records, compute summaries, and run Nil-kill pressure analysis. +5. Add adapters incrementally with exact fixture tests over emitted facts. + +The Decomplex walk should run once per file, thread scope in the walk stack, and +avoid hidden fallbacks to Ruby `SourceIndex`. diff --git a/gems/nil-kill/lib/nil_kill/decomplex_static_facts.rb b/gems/nil-kill/lib/nil_kill/decomplex_static_facts.rb new file mode 100644 index 000000000..3b6f61895 --- /dev/null +++ b/gems/nil-kill/lib/nil_kill/decomplex_static_facts.rb @@ -0,0 +1,1122 @@ +# typed: false +# frozen_string_literal: true + +require "set" +require "pathname" + +module Decomplex + module NilKillStaticFacts + module_function + + def build(document, structural_facts, root: NilKill::ROOT) + Builder.new(document, structural_facts, root: root).build + end + + class Builder + def initialize(document, structural_facts, root: NilKill::ROOT) + @document = document + @facts = structural_facts + @language = document.language.to_s + @root = root + end + + def build + state_declarations = normalized_state_declarations + known_states = declared_states_by_owner(state_declarations) + + { + methods: methods, + fields: fields(state_declarations, known_states), + state_types: state_types(state_declarations), + state_protocols: state_protocols(known_states), + state_param_origins: state_param_origins(known_states), + signatures: signatures, + type_definitions: type_definitions(state_declarations), + hash_shapes: literal_shapes(:hash), + array_shapes: literal_shapes(:array), + } + end + + private + + def methods + Array(@facts[:function_defs]).map do |fn| + signature = method_signature(fn) + owner = method_owner(fn) + { + "key" => [owner, fn.name.to_s, fn.kind.to_s], + "owner" => owner, + "name" => fn.name.to_s, + "kind" => fn.kind.to_s, + "path" => rel(fn.file), + "line" => fn.line, + "span" => fn.span, + "language" => @language, + "signature" => signature, + "params" => Array(fn.params).map(&:to_s), + "source" => method_source(signature), + } + end + end + + def fields(state_declarations, known_states) + out = [] + seen = Set.new + state_declarations.each do |state| + field = declared_state_field(state.field) + out << field_record(state, field, "state_declaration") + seen.add(state_key(state.owner, field)) + end + + Array(@facts[:state_writes]).each do |write| + next unless owned_state?(write, known_states[write.owner.to_s]) + + field = canonical_state_field(write.field, receiver: write.receiver) + key = state_key(write.owner, field) + next if seen.include?(key) + + out << field_record(write, field, "state_write") + seen.add(key) + end + out + end + + def field_record(state, field, origin) + { + "id" => [@language, rel(state.file), state.owner, "field", field].map(&:to_s).join("\u0000"), + "language" => @language, + "path" => rel(state.file), + "owner" => state.owner.to_s, + "name" => field.to_s, + "line" => state.line, + "span" => state.span, + "declared_type" => state.respond_to?(:type) && !state.type.to_s.empty? ? state.type.to_s : nil, + "static_origin" => origin, + "source" => "syntax", + } + end + + def state_types(state_declarations) + state_declarations.each_with_object({}) do |state, out| + type = state.type.to_s + next if type.empty? + + out[state_key(state.owner, declared_state_field(state.field))] = type + end + end + + def state_protocols(known_states) + out = Hash.new { |hash, key| hash[key] = Set.new } + Array(@facts[:call_sites]).each do |call| + state = receiver_state_field(call.receiver, known_states[call.owner.to_s]) + next unless state + + out[state_key(call.owner, state)].add(call.message.to_s) + end + stringify_set_map(out) + end + + def state_param_origins(known_states) + out = Hash.new { |hash, key| hash[key] = Set.new } + Array(@facts[:state_param_origins]).each do |origin| + next unless owned_state?(origin, known_states[origin.owner.to_s]) + next if self_receiver_names.include?(origin.param.to_s) + + field = canonical_state_field(origin.field, receiver: origin.receiver) + out[state_key(origin.owner, field)].add(origin.param.to_s) + end + stringify_set_map(out) + end + + def signatures + Array(@facts[:function_defs]).each_with_object({}) do |fn, out| + signature = method_signature(fn) + out[[method_owner(fn), fn.name.to_s].join("\u0000")] = signature unless signature.empty? + end + end + + def type_definitions(state_declarations) + definitions = [] + Array(@facts[:function_defs]).each do |fn| + definition = method_type_definition(fn) + definitions << definition if definition + end + state_declarations.each do |state| + definition = state_field_type_definition(state) + definitions << definition if definition + end + definitions.concat(ruby_struct_new_type_definitions) + definitions.concat(ruby_include_type_definitions) + definitions.concat(ruby_type_alias_definitions) + definitions.concat(python_stub_type_definitions) + definitions.concat(python_type_alias_definitions) + definitions.concat(typescript_interface_type_definitions) + definitions.concat(typescript_type_alias_definitions) + definitions + end + + def literal_shapes(kind) + shapes = [] + walk_tree(@document.root) do |node| + shape = kind == :hash ? hash_shape(node) : array_shape(node) + shapes << shape if shape + end + shapes.uniq { |shape| [shape["path"], shape["line"], shape["code"]] } + end + + def normalized_state_declarations + declarations = Array(@facts[:state_declarations]).dup + declarations.concat(extra_typed_state_declarations) + declarations.uniq { |state| [state.file, state.owner, declared_state_field(state.field), state.line, state.type] } + end + + def extra_typed_state_declarations + out = [] + walk_tree(@document.root) do |node| + next unless %w[assignment assignment_expression assignment_statement].include?(node.kind.to_s) + + lhs = named_child(node, "left") || node.named_children.first + target = state_target(lhs) + next unless target + + type = declared_type_text(node, lhs) + next if type.to_s.empty? + + out << Decomplex::Syntax::StateDeclaration.new( + field: target.fetch(:field), + owner: owner_for_line(node_line(node)), + type: type, + file: @document.file, + line: node_line(node), + span: node_span(node) + ) + end + out.concat(ruby_t_struct_state_declarations) + out + end + + def declared_states_by_owner(state_declarations) + state_declarations.each_with_object(Hash.new { |hash, key| hash[key] = Set.new }) do |state, out| + out[state.owner.to_s].add(declared_state_field(state.field)) + end + end + + def method_signature(fn) + signature = fn.signature.to_s + return signature if @language != "ruby" + + signature.strip.start_with?("sig ") ? signature : "" + end + + def method_source(signature) + return {} if signature.to_s.empty? + return { "sig" => signature, "signature" => signature, "type_system" => "sorbet", "source" => "annotation" } if @language == "ruby" + + { "signature" => signature, "source" => "syntax" } + end + + def method_type_definition(fn) + case @language + when "ruby" + ruby_method_type_definition(fn) + when "python" + python_method_type_definition(fn) + when "typescript" + typescript_method_type_definition(fn) + end + end + + def ruby_method_type_definition(fn) + signature = method_signature(fn) + return nil if signature.empty? + + owner = method_owner(fn) + { + "id" => ["ruby", rel(fn.file), owner, "method_signature", fn.name, fn.line, "sorbet"].map(&:to_s).join("\u0000"), + "language" => "ruby", + "type_system" => "sorbet", + "kind" => "method_signature", + "path" => rel(fn.file), + "owner" => owner, + "name" => fn.name.to_s, + "line" => fn.line, + "signature" => signature, + "return_type" => NilKill.extract_return_type(signature), + "params" => NilKill.extract_param_entries(signature).map { |name, type| { "name" => name, "type" => type } }, + } + end + + def python_method_type_definition(fn) + typed = python_signature_types(fn.signature) + return nil if typed[:params].empty? && typed[:return_type].to_s.empty? + + { + "id" => ["python", rel(fn.file), fn.owner, "method_signature", fn.name, fn.line, "python-typing"].map(&:to_s).join("\u0000"), + "language" => "python", + "type_system" => "python-typing", + "kind" => "method_signature", + "path" => rel(fn.file), + "owner" => fn.owner.to_s, + "name" => fn.name.to_s, + "line" => fn.line, + "signature" => fn.signature.to_s, + "return_type" => typed[:return_type], + "params" => typed[:params], + } + end + + def typescript_method_type_definition(fn) + typed = typescript_signature_types(fn.signature) + return nil if typed[:params].empty? && typed[:return_type].to_s.empty? + + { + "id" => ["typescript", rel(fn.file), fn.owner, "method_signature", fn.name, fn.line, "typescript"].map(&:to_s).join("\u0000"), + "language" => "typescript", + "type_system" => "typescript", + "kind" => "method_signature", + "path" => rel(fn.file), + "owner" => fn.owner.to_s, + "name" => fn.name.to_s, + "line" => fn.line, + "signature" => fn.signature.to_s, + "return_type" => typed[:return_type], + "params" => typed[:params], + } + end + + def state_field_type_definition(state) + type = state.type.to_s + system = annotation_type_system + return nil if type.empty? || system.empty? + + field = declared_state_field(state.field) + { + "id" => [@language, rel(state.file), state.owner, "state_field", field, state.line, system].map(&:to_s).join("\u0000"), + "language" => @language, + "type_system" => system, + "kind" => "state_field", + "path" => rel(state.file), + "owner" => state.owner.to_s, + "name" => field, + "line" => state.line, + "declared_type" => type, + } + end + + def annotation_type_system + case @language + when "ruby" then "sorbet" + when "python" then "python-typing" + when "typescript" then "typescript" + else "" + end + end + + def ruby_struct_new_type_definitions + return [] unless @language == "ruby" + + ruby_struct_definitions.flat_map do |struct| + struct.fetch(:fields).map do |name| + { + "id" => ["ruby", rel(@document.file), struct.fetch(:owner), "state_field", name, struct.fetch(:line), "ruby-struct"].map(&:to_s).join("\u0000"), + "language" => "ruby", + "type_system" => "ruby-struct", + "kind" => "state_field", + "path" => rel(@document.file), + "owner" => struct.fetch(:owner), + "name" => name, + "line" => struct.fetch(:line), + "declared_type" => nil, + } + end + end + end + + def ruby_include_type_definitions + return [] unless @language == "ruby" + + out = [] + walk_tree(@document.root) do |node| + match = node_text(node).match(/\Ainclude\s+([A-Z]\w*(?:::[A-Z]\w*)*)\b/) + next unless match + + owner = owner_for_line(node_line(node), include_struct: true).to_s + next if owner.empty? + + included_name = resolved_include_name(owner, match[1]) + out << { + "id" => ["ruby", rel(@document.file), owner, "included_module", included_name, node_line(node), "ruby-include"].map(&:to_s).join("\u0000"), + "language" => "ruby", + "type_system" => "ruby-include", + "kind" => "included_module", + "path" => rel(@document.file), + "owner" => owner, + "name" => included_name, + "line" => node_line(node), + } + end + out + end + + def ruby_type_alias_definitions + return [] unless @language == "ruby" + + type_alias_definitions("ruby", "sorbet", /\A([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*(.+)\s*\}\s*(?:#.*)?\z/) + end + + def python_stub_type_definitions + return [] unless @language == "python" && File.extname(@document.file).downcase == ".pyi" + + definitions = [] + owner = nil + owner_indent = nil + @document.lines.each_with_index do |line, idx| + line_no = idx + 1 + stripped = line.strip + next if stripped.empty? || stripped.start_with?("#") + + indent = line[/\A\s*/].to_s.length + if owner && indent <= owner_indent.to_i && !stripped.start_with?("def ") + owner = nil + owner_indent = nil + end + + if (match = stripped.match(/\Aclass\s+([A-Za-z_]\w*)\b/)) + owner = match[1] + owner_indent = indent + next + end + + if (match = stripped.match(/\A(?:async\s+)?def\s+([A-Za-z_]\w*)\s*\((.*)\)\s*(?:->\s*([^:]+))?:/)) + signature = stripped.sub(/\s*\.\.\.\s*\z/, "") + typed = python_signature_types(signature) + definitions << { + "id" => ["python", rel(@document.file), owner, "method_signature", match[1], line_no, "python-typing-stub"].map(&:to_s).join("\u0000"), + "language" => "python", + "type_system" => "python-typing", + "kind" => "method_signature", + "path" => rel(@document.file), + "owner" => owner.to_s, + "name" => match[1], + "line" => line_no, + "signature" => signature, + "return_type" => typed[:return_type], + "params" => typed[:params], + } + elsif owner && (match = stripped.match(/\A([A-Za-z_]\w*)\s*:\s*([^=#]+)(?:\s*=.*)?\z/)) + definitions << { + "id" => ["python", rel(@document.file), owner, "state_field", match[1], line_no, "python-typing-stub"].map(&:to_s).join("\u0000"), + "language" => "python", + "type_system" => "python-typing", + "kind" => "state_field", + "path" => rel(@document.file), + "owner" => owner.to_s, + "name" => match[1], + "line" => line_no, + "declared_type" => match[2].strip, + } + end + end + definitions + end + + def python_type_alias_definitions + return [] unless @language == "python" + + @document.lines.each_with_index.filter_map do |line, idx| + stripped = line.strip + next if stripped.empty? || stripped.start_with?("#") + + if (match = stripped.match(/\A([A-Z]\w*)\s*:\s*TypeAlias\s*=\s*(.+?)\s*(?:#.*)?\z/)) + alias_definition("python", "python-typing", "", match[1], match[2].strip, idx + 1) + elsif (match = stripped.match(/\Atype\s+([A-Z]\w*)\s*=\s*(.+?)\s*(?:#.*)?\z/)) + alias_definition("python", "python-typing", "", match[1], match[2].strip, idx + 1) + end + end + end + + def typescript_interface_type_definitions + return [] unless @language == "typescript" + + definitions = [] + owner = nil + @document.lines.each_with_index do |line, idx| + line_no = idx + 1 + stripped = line.strip + if (match = stripped.match(/\A(?:export\s+)?interface\s+([A-Za-z_$]\w*)\b/)) + owner = match[1] + next + end + + if owner && stripped.start_with?("}") + owner = nil + next + end + next unless owner + + if (match = stripped.match(/\A([A-Za-z_$]\w*)\??\s*\((.*)\)\s*:\s*([^;{]+)/)) + params = NilKill.split_top_level(match[2]).filter_map do |entry| + name, type = typescript_param_entry(entry) + next unless name && type + + { "name" => name, "type" => type } + end + definitions << { + "id" => ["typescript", rel(@document.file), owner, "method_signature", match[1], line_no, "typescript-interface"].map(&:to_s).join("\u0000"), + "language" => "typescript", + "type_system" => "typescript", + "kind" => "method_signature", + "path" => rel(@document.file), + "owner" => owner, + "name" => match[1], + "line" => line_no, + "signature" => stripped.delete_suffix(";"), + "return_type" => match[3].strip, + "params" => params, + } + elsif (match = stripped.match(/\A([A-Za-z_$]\w*)\??\s*:\s*([^;{]+)/)) + definitions << { + "id" => ["typescript", rel(@document.file), owner, "state_field", match[1], line_no, "typescript-interface"].map(&:to_s).join("\u0000"), + "language" => "typescript", + "type_system" => "typescript", + "kind" => "state_field", + "path" => rel(@document.file), + "owner" => owner, + "name" => match[1], + "line" => line_no, + "declared_type" => match[2].strip, + } + end + end + definitions + end + + def typescript_type_alias_definitions + return [] unless @language == "typescript" + + @document.lines.each_with_index.filter_map do |line, idx| + match = line.strip.match(/\A(?:export\s+)?type\s+([A-Za-z_$]\w*)\s*=\s*(.+?)\s*;?\s*\z/) + next unless match + + alias_definition("typescript", "typescript", "", match[1], match[2].strip, idx + 1) + end + end + + def type_alias_definitions(language, type_system, pattern) + owner_stack = [] + pending = nil + definitions = [] + @document.lines.each_with_index do |line, idx| + line_no = idx + 1 + stripped = line.strip + next if stripped.empty? || stripped.start_with?("#") + + if pending + if stripped == "end" && line_indent(line) <= pending[:indent] + target = pending[:body].join(" ").gsub(/\s+/, " ").strip.sub(/,\z/, "") + definitions << alias_definition(language, type_system, pending[:owner], pending[:name], target, pending[:line]) unless target.empty? + pending = nil + else + pending[:body] << stripped + end + next + end + + if (match = stripped.match(/\A(?:class|module)\s+([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) + owner_stack << qualified_owner(owner_stack.last, match[1]) + next + end + + if stripped == "end" + owner_stack.pop + next + end + + if (match = stripped.match(pattern)) + definitions << alias_definition(language, type_system, owner_stack.last.to_s, match[1], match[2].strip, line_no) + elsif (match = stripped.match(/\A([A-Z]\w*)\s*=\s*T\.type_alias\s+do\b/)) + pending = { owner: owner_stack.last.to_s, name: match[1], line: line_no, indent: line_indent(line), body: [] } + end + end + definitions + end + + def alias_definition(language, type_system, owner, name, target, line) + { + "id" => [language, rel(@document.file), owner, "type_alias", name, line, type_system].map(&:to_s).join("\u0000"), + "language" => language, + "type_system" => type_system, + "kind" => "type_alias", + "path" => rel(@document.file), + "owner" => owner.to_s, + "name" => name.to_s, + "line" => line, + "target" => target.to_s, + } + end + + def hash_shape(node) + return nil unless hash_literal_node?(node) + + pairs = hash_pair_nodes(node) + return nil if pairs.empty? + + keys = [] + value_types = [] + constants = constant_literal_types + pairs.each do |pair| + key = hash_key_name(hash_pair_key(pair)) + next unless key + + keys << key + value_types << literal_value_type(hash_pair_value(pair), constants) + end + return nil if keys.empty? + + { + "path" => rel(@document.file), + "line" => node_line(node), + "span" => node_span(node), + "keys" => keys, + "value_types" => value_types, + "code" => node_text(node), + "source" => "syntax", + } + end + + def array_shape(node) + return nil unless array_literal_node?(node) + + elements = array_elements(node) + constants = constant_literal_types + types = elements.map { |child| literal_value_type(child, constants) } + { + "path" => rel(@document.file), + "line" => node_line(node), + "span" => node_span(node), + "element_types" => types.uniq, + "tuple_types" => types, + "size" => elements.size, + "homogeneous" => types.uniq.size <= 1, + "code" => node_text(node), + "source" => "syntax", + } + end + + def python_signature_types(signature) + source = signature.to_s.strip + match = source.match(/\A(?:async\s+)?def\s+\w+\s*\((.*)\)\s*(?:->\s*([^:]+))?:/) + return { params: [], return_type: nil } unless match + + params = NilKill.split_top_level(match[1]).filter_map do |entry| + entry = entry.sub(/\A\*\*?/, "").strip + name, rest = entry.split(/:\s*/, 2) + next unless name && rest + + name = name.sub(/=.*/, "").strip + next if self_receiver_names.include?(name) + + type = rest.sub(/=.*/, "").strip + next if type.empty? + + { "name" => name, "type" => type } + end + { params: params, return_type: match[2]&.strip } + end + + def typescript_signature_types(signature) + source = signature.to_s.strip + params_source, close_idx = extract_parenthesized(source) + return { params: [], return_type: nil } unless params_source + + params = NilKill.split_top_level(params_source).filter_map do |entry| + name, type = typescript_param_entry(entry) + next unless name && type + + { "name" => name, "type" => type } + end + tail = source[(close_idx + 1)..].to_s + { params: params, return_type: tail[/\A\s*:\s*([^={;]+)/, 1]&.strip } + end + + def typescript_param_entry(entry) + text = entry.to_s.strip + return [nil, nil] if text.empty? + + text = text.sub(/\A(?:public|private|protected|readonly|override|declare)\s+/, "") + text = text.sub(/\A(?:public|private|protected)\s+readonly\s+/, "") + text = text.sub(/\A\.\.\./, "") + name, type = text.split(/:\s*/, 2) + return [nil, nil] unless name && type + + name = name.sub(/=.*/, "").sub(/\?\z/, "").strip + type = type.sub(/=.*/, "").strip + return [nil, nil] if name.empty? || type.empty? + + [name, type] + end + + def extract_parenthesized(source) + start = source.index("(") + return [nil, nil] unless start + + depth = 0 + i = start + while i < source.length + case source[i] + when "(" + depth += 1 + when ")" + depth -= 1 + return [source[(start + 1)...i], i] if depth.zero? + end + i += 1 + end + [nil, nil] + end + + def canonical_state_field(field, receiver: nil) + text = field.to_s + return text if text.empty? || text.start_with?("@") + return "@#{text}" if %w[python typescript javascript].include?(@language) && owned_receiver_name?(receiver) + + text + end + + def declared_state_field(field) + text = field.to_s + return text if text.empty? || text.start_with?("@") + return "@#{text}" if %w[python typescript javascript].include?(@language) + + text + end + + def receiver_state_field(receiver, known_states) + known = Set.new(Array(known_states).map { |field| canonical_state_field(field) }) + text = receiver.to_s.sub(/\A\*/, "") + return nil if text.empty? || self_receiver_names.include?(text) + return canonical_state_field(text.split(".").first, receiver: text) if text.start_with?("@") + + self_receiver_names.each do |name| + prefix = "#{name}." + return canonical_state_field(text.split(".")[1], receiver: text) if text.start_with?(prefix) + end + + first = canonical_state_field(text.split(".").first, receiver: text) + known.include?(first) ? first : nil + end + + def owned_state?(record, known_states) + known = Set.new(Array(known_states).map { |field| canonical_state_field(field) }) + field = canonical_state_field(record.field, receiver: record.receiver) + return true if known.include?(field) + + receiver = record.receiver.to_s + return false if receiver == ".literal" + + owned_receiver_name?(receiver) + end + + def owned_receiver_name?(receiver) + text = receiver.to_s.sub(/\A\*/, "") + return true if text.start_with?("@") + + self_receiver_names.any? { |name| text == name || text.start_with?("#{name}.") } + end + + def self_receiver_names + case @language + when "python" then %w[self cls] + when "typescript", "javascript" then %w[this] + else %w[self this] + end + end + + def state_key(owner, field) + [owner.to_s, field.to_s].join("\u0000") + end + + def stringify_set_map(map) + Hash[map.sort.map { |key, values| [key, values.to_a.map(&:to_s).sort.uniq] }] + end + + def state_target(node) + return nil unless ts_node?(node) + + case node.kind.to_s + when "call" + receiver = named_child(node, "receiver") + method = named_child(node, "method") + return nil unless receiver && method + + { receiver: node_text(receiver), field: node_text(method).sub(/=\z/, "") } + when "field", "selector_expression", "member_expression", "attribute", "field_expression", "expression_list" + object = named_child(node, "object") || named_child(node, "receiver") || + named_child(node, "operand") || named_child(node, "value") + field = named_child(node, "field") || named_child(node, "property") || node.named_children.last + return nil unless object && field + + { receiver: node_text(object), field: node_text(field).sub(/=\z/, "") } + when "instance_variable" + { receiver: "self", field: node_text(node) } + end + end + + def declared_type_text(node, name_node) + text = node_text(node) + after_name = text[(name_node.end_byte - node.start_byte)..].to_s + return normalize_text(Regexp.last_match(1)) if after_name.match(/\A\s*:\s*([^=,\n]+)/) + return normalize_text(Regexp.last_match(1)) if text.match(/\A\s*(?:pub\s+)?(?:const|var)\s+\w+\s*:\s*([^=;\n]+)/) + + nil + rescue StandardError + nil + end + + def hash_literal_node?(node) + %w[hash dictionary object map literal_value].include?(node.kind.to_s) || + (node_text(node).start_with?("{") && node_text(node).end_with?("}") && hash_pair_nodes(node).any?) + end + + def hash_pair_nodes(node) + Array(node.named_children).select do |child| + %w[pair hash_pair pair_pattern keyed_element field_initializer].include?(child.kind.to_s) || + named_child(child, "key") + end + end + + def hash_pair_key(pair) + named_child(pair, "key") || pair.named_children.first + end + + def hash_pair_value(pair) + named_child(pair, "value") || named_child(pair, "field") || pair.named_children[1] + end + + def hash_key_name(node) + text = node_text(node) + return nil if text.empty? + return Regexp.last_match(1) if text.match?(/\A:([A-Za-z_]\w*[!?=]?)\z/) + return Regexp.last_match(1) if text.match?(/\A([A-Za-z_]\w*)\s*:\z/) + return Regexp.last_match(1) if text.match?(/\A["']([^"']+)["']\z/) + return text if text.match?(/\A[A-Za-z_]\w*[!?=]?\z/) + + nil + end + + def array_literal_node?(node) + %w[array list array_literal list_literal].include?(node.kind.to_s) + end + + def array_elements(node) + Array(node.named_children).reject { |child| %w[comment].include?(child.kind.to_s) } + end + + def method_owner(fn) + ruby_struct_owner_for_line(fn.line) || fn.owner.to_s + end + + def ruby_struct_definitions + @ruby_struct_definitions ||= begin + definitions = [] + walk_tree(@document.root) do |node| + match = node_text(node).match(/\A([A-Z]\w*)\s*=\s*Struct\.new\((.*?)\)/m) + next unless match + + parent = owner_for_line(node_line(node), include_struct: false) + owner = qualified_owner(parent, match[1]) + fields = NilKill.split_top_level(match[2]).filter_map do |arg| + arg.strip[/\A:([A-Za-z_]\w*)\z/, 1] + end + next if fields.empty? + + definitions << { + owner: owner, + line: node_line(node), + span: node_span(node), + fields: fields, + } + end + definitions.uniq { |entry| [entry.fetch(:owner), entry.fetch(:line), entry.fetch(:fields)] } + end + end + + def ruby_struct_owner_for_line(line) + deepest_owner_for_line(ruby_struct_definitions, line) + end + + def ruby_t_struct_state_declarations + ruby_t_struct_fields.map do |field| + Decomplex::Syntax::StateDeclaration.new( + field: field.fetch(:name), + owner: field.fetch(:owner), + type: field.fetch(:type), + file: @document.file, + line: field.fetch(:line), + span: field.fetch(:span) + ) + end + end + + def ruby_t_struct_fields + return [] unless @language == "ruby" + + @ruby_t_struct_fields ||= begin + fields = [] + walk_tree(@document.root) do |node| + next unless node.kind.to_s == "call" + + match = node_text(node).match(/\A(?:const|prop)\s+:([A-Za-z_]\w*)\s*,\s*(.+?)\s*(?:do\b.*)?\z/m) + next unless match + + owner = ruby_t_struct_owner_for_line(node_line(node)) + next if owner.to_s.empty? + + fields << { + owner: owner, + name: match[1], + type: normalize_text(match[2]), + line: node_line(node), + span: node_span(node), + } + end + fields.uniq { |field| [field.fetch(:owner), field.fetch(:name), field.fetch(:line)] } + end + end + + def ruby_t_struct_containers + return [] unless @language == "ruby" + + @ruby_t_struct_containers ||= begin + containers = [] + walk_tree(@document.root) do |node| + match = node_text(node).match(/\Aclass\s+([A-Z]\w*(?:::[A-Z]\w*)*)\s*<\s*T::Struct\b/m) + next unless match + + owner = declaration_owner_for_line(match[1], node_line(node)) + containers << { + owner: owner, + line: node_line(node), + span: node_span(node), + } + end + containers.uniq { |entry| [entry.fetch(:owner), entry.fetch(:line)] } + end + end + + def ruby_t_struct_owner_for_line(line) + deepest_owner_for_line(ruby_t_struct_containers, line) + end + + def deepest_owner_for_line(records, line) + Array(records).select { |record| span_contains_line?(record.fetch(:span), line) } + .max_by { |record| span_sort_key(record.fetch(:span)) } + &.fetch(:owner, nil) + end + + def span_contains_line?(span, line) + range = Array(span) + range[0].to_i <= line.to_i && range[2].to_i >= line.to_i + end + + def span_sort_key(span) + range = Array(span) + [range[0].to_i, -((range[2].to_i - range[0].to_i).abs)] + end + + def declaration_owner_for_line(name, line) + owner = owner_for_line(line, include_struct: false).to_s + return owner if owner == name.to_s || owner.end_with?("::#{name}") + + qualified_owner(owner, name) + end + + def resolved_include_name(owner, name) + return name.to_s if name.to_s.include?("::") + + namespace = owner.to_s.split("::")[0...-1].join("::") + qualified = qualified_owner(namespace, name) + owner_names.include?(qualified) ? qualified : name.to_s + end + + def owner_names + @owner_names ||= Set.new(Array(@facts[:owner_defs]).map { |owner| owner.name.to_s }) + end + + def constant_literal_types + @constant_literal_types ||= begin + types = {} + walk_tree(@document.root) do |node| + name, value = constant_assignment(node) + next if name.to_s.empty? + + type = if value + literal_value_type(value, types) + else + literal_text_type(node_text(node).split("=", 2)[1].to_s, types) + end + types[name] = type unless type == "T.untyped" + end + types + end + end + + def constant_assignment(node) + if %w[assignment assignment_expression assignment_statement].include?(node.kind.to_s) + target = named_child(node, "left") || node.named_children.first + return [nil, nil] unless target && target.kind.to_s == "constant" + + return [node_text(target), named_child(node, "right") || node.named_children[1]] + end + + match = node_text(node).match(/\A([A-Z]\w*)\s*=\s*(.+)\z/m) + return [nil, nil] unless match + + value = node.named_children.drop(1).find { |child| node_text(child) == match[2].strip } || + node.named_children[1] + [match[1], value] + end + + def literal_value_type(node, constant_types = constant_literal_types) + return "T.untyped" unless node + + kind = node.kind.to_s + text = node_text(node) + case kind + when "string", "string_literal", "interpreted_string_literal", "raw_string_literal" then "String" + when "integer", "integer_literal" then "Integer" + when "float", "float_literal" then "Float" + when "true", "false", "true_literal", "false_literal", "boolean" then "T::Boolean" + when "nil", "none", "null", "nil_literal", "none_literal", "null_literal" then "NilClass" + when "symbol", "simple_symbol", "hash_key_symbol" then "Symbol" + when "symbol_array" then "T::Array[Symbol]" + when "string_array" then "T::Array[String]" + when "constant" then constant_types[text] || "T.untyped" + else + text_type = literal_text_type(text, constant_types) + return text_type unless text_type == "T.untyped" + + return array_literal_type(node, constant_types) if array_literal_node?(node) + return "T::Hash[T.untyped, T.untyped]" if hash_literal_node?(node) + + "T.untyped" + end + end + + def literal_text_type(text, constant_types = {}) + value = text.to_s.strip + return "String" if value.match?(/\A["']/) + return "Symbol" if value.match?(/\A:/) + return "T::Array[Symbol]" if value.match?(/\A%i[\[\(\{]/) + return "T::Array[String]" if value.match?(/\A%w[\[\(\{]/) + return "Integer" if value.match?(/\A[-+]?\d+\z/) + return "Float" if value.match?(/\A[-+]?\d+\.\d+\z/) + return "T::Boolean" if %w[true false True False].include?(value) + return "NilClass" if %w[nil null None].include?(value) + return constant_types[value] if constant_types.key?(value) + + "T.untyped" + end + + def array_literal_type(node, constant_types) + types = array_elements(node).map { |child| literal_value_type(child, constant_types) } + return "T::Array[T.untyped]" if types.empty? || types.include?("T.untyped") + + unique = types.uniq + unique.size == 1 ? "T::Array[#{unique.first}]" : "T::Array[T.any(#{unique.join(", ")})]" + end + + def owner_for_line(line, include_struct: false) + if include_struct + owner = ruby_struct_owner_for_line(line) + return owner if owner + end + + Array(@facts[:owner_defs]).select do |owner| + span = Array(owner.span) + span[0].to_i <= line.to_i && span[2].to_i >= line.to_i + end.max_by { |owner| Array(owner.span)[0].to_i }&.name || file_owner + end + + def qualified_owner(parent, name) + return name.to_s if name.to_s.include?("::") + + parent.to_s.empty? ? name.to_s : "#{parent}::#{name}" + end + + def file_owner + File.basename(@document.file.to_s, File.extname(@document.file.to_s)) + end + + def rel(path) + Pathname.new(path).relative_path_from(Pathname.new(@root)).to_s + rescue StandardError + path.to_s + end + + def walk_tree(node, &block) + return unless ts_node?(node) + + yield node + node.children.each { |child| walk_tree(child, &block) } + end + + def ts_node?(node) + node && node.respond_to?(:kind) && node.respond_to?(:children) + end + + def named_child(node, name) + node.child_by_field_name(name) + rescue StandardError + nil + end + + def node_line(node) + node.start_point.row + 1 + rescue StandardError + 1 + end + + def node_span(node) + [node.start_point.row + 1, node.start_point.column, node.end_point.row + 1, node.end_point.column] + rescue StandardError + nil + end + + def node_text(node) + node&.text.to_s.strip + end + + def normalize_text(text) + text.to_s.strip.gsub(/\s+/, " ") + end + + def line_indent(line) + line[/\A\s*/].to_s.length + end + end + end + + module Ast + class TreeSitterNormalizer + def nil_kill_static_facts(structural_facts, root: NilKill::ROOT) + Decomplex::NilKillStaticFacts.build(@document, structural_facts, root: root) + end + end + end + + module Syntax + class Document + def static_facts(root: NilKill::ROOT) + @static_facts ||= {} + @static_facts[root] ||= adapter.static_facts(self, root: root) + end + end + + class TreeSitterAdapter + def static_facts(document, root: NilKill::ROOT) + Decomplex::Ast::TreeSitterNormalizer.new(document).nil_kill_static_facts(structural_facts(document), root: root) + end + end + end +end diff --git a/gems/nil-kill/lib/nil_kill/languages/provider.rb b/gems/nil-kill/lib/nil_kill/languages/provider.rb index a4769d65a..1862f6fb3 100644 --- a/gems/nil-kill/lib/nil_kill/languages/provider.rb +++ b/gems/nil-kill/lib/nil_kill/languages/provider.rb @@ -1,8 +1,6 @@ # typed: false # frozen_string_literal: true -require "set" - module NilKill module Languages class UnsupportedRuntimeTracer < StandardError; end @@ -32,10 +30,16 @@ def static_parser "tree_sitter" end + # Real checker/indexer backends only. Syntax annotations are reported + # separately so capability metadata does not overclaim semantic typing. def type_systems [] end + def annotation_systems + [] + end + def type_indexing? !type_systems.empty? end @@ -74,6 +78,7 @@ def capability "extensions" => extensions.map(&:to_s).sort, "static_analysis" => static_analysis?, "static_parser" => static_parser, + "annotation_systems" => annotation_systems.map(&:to_s).sort, "type_indexing" => type_indexing?, "type_systems" => type_systems.map(&:to_s).sort, "runtime_tracing" => runtime_tracing?, @@ -95,409 +100,9 @@ def field_type_index(root:) {} end - def external_type_definitions(root:) - [] - end - def static_diff_findings(root:, added_lines:, context_paths:, finding_class:) [] end - - def static_evidence(document:, facts:, rel_path:) - state_declarations = Array(facts[:state_declarations]) + - extra_state_declarations(document: document, facts: facts, rel_path: rel_path) - state_param_origins = Array(facts[:state_param_origins]) + - extra_state_param_origins(document: document, facts: facts, rel_path: rel_path) - known_states = declared_states_by_owner(state_declarations) - - methods = [] - signatures = {} - Array(facts[:function_defs]).each do |fn| - record = method_record(document, rel_path, fn) - methods << record - signature = static_method_signature(fn) - signatures[[fn.owner.to_s, fn.name.to_s].join("\u0000")] = signature unless signature.empty? - end - - fields = [] - state_types = {} - seen_fields = Set.new - state_declarations.each do |state| - field = declared_state_field(state.field) - fields << field_record(document, rel_path, state, field) - seen_fields.add(state_key(state.owner, field)) - next if state.type.to_s.empty? - - state_types[state_key(state.owner, field)] = state.type.to_s - end - Array(facts[:state_writes]).each do |write| - next unless owned_state_write?(write, known_states[write.owner.to_s]) - - field = canonical_state_field(write.field, receiver: write.receiver) - key = state_key(write.owner, field) - next if seen_fields.include?(key) - - fields << field_record(document, rel_path, write, field) - seen_fields.add(key) - end - - state_protocols = Hash.new { |hash, key| hash[key] = Set.new } - state_param_origin_map = Hash.new { |hash, key| hash[key] = Set.new } - - state_param_origins.each do |origin| - next unless owned_state_origin?(origin, known_states[origin.owner.to_s]) - next if self_receiver_names.include?(origin.param.to_s) - - field = canonical_state_field(origin.field, receiver: origin.receiver) - state_param_origin_map[state_key(origin.owner, field)].add(origin.param.to_s) - end - - Array(facts[:call_sites]).each do |call| - state = receiver_state_field(call.receiver, known_states[call.owner.to_s]) - next unless state - - state_protocols[state_key(call.owner, state)].add(call.message.to_s) - end - - { - "methods" => methods, - "fields" => fields, - "state_types" => state_types, - "state_protocols" => stringify_set_map(state_protocols), - "state_param_origins" => stringify_set_map(state_param_origin_map), - "signatures" => signatures, - "hash_shapes" => hash_shapes(document: document, facts: facts, rel_path: rel_path), - "type_definitions" => type_definitions( - document: document, - facts: facts, - rel_path: rel_path, - methods: methods, - state_declarations: state_declarations - ), - } - end - - def canonical_state_field(field, receiver: nil) - field.to_s - end - - def declared_state_field(field) - canonical_state_field(field) - end - - def owned_state_origin?(origin, known_states) - known = normalize_known_states(known_states) - field = canonical_state_field(origin.field, receiver: origin.receiver) - return true if known.include?(field) - - receiver = normalize_receiver(origin.receiver) - return false if receiver == ".literal" - - self_receiver?(receiver) || owned_receiver?(receiver) - end - - def owned_state_write?(write, known_states) - known = normalize_known_states(known_states) - field = canonical_state_field(write.field, receiver: write.receiver) - return true if known.include?(field) - - receiver = normalize_receiver(write.receiver) - return false if receiver == ".literal" - - self_receiver?(receiver) || owned_receiver?(receiver) - end - - def receiver_state_field(receiver, known_states) - known = normalize_known_states(known_states) - text = normalize_receiver(receiver).sub(/\A\*/, "") - return nil if text.empty? || self_receiver?(text) - - if instance_field_receiver?(text) - return canonical_state_field(text.split(".").first, receiver: text) - end - - self_receiver_names.each do |name| - prefix = "#{name}." - next unless text.start_with?(prefix) - - return canonical_state_field(text.split(".")[1], receiver: text) - end - - first = canonical_state_field(text.split(".").first, receiver: text) - known.include?(first) ? first : nil - end - - def extra_state_declarations(document:, facts:, rel_path:) - [] - end - - def extra_state_param_origins(document:, facts:, rel_path:) - [] - end - - def type_definitions(document:, facts:, rel_path:, methods:, state_declarations:) - [] - end - - def hash_shapes(document:, facts:, rel_path:) - shapes = [] - constant_types = literal_constant_types(document.root) - walk_tree(document.root) do |node| - next unless hash_literal_node?(node) - - pairs = hash_pair_nodes(node) - next if pairs.empty? - - keys = [] - value_types = [] - pairs.each do |pair| - key = hash_key_name(hash_pair_key(pair)) - next unless key - - keys << key - value_types << literal_value_type(hash_pair_value(pair), constant_types) - end - next if keys.empty? - - shapes << { - "path" => rel_path, - "line" => node_line(node), - "keys" => keys, - "value_types" => value_types, - "code" => node_text(node), - } - end - shapes - end - - def static_method_signature(function_def) - function_def.signature.to_s - end - - def method_source(function_def) - signature = static_method_signature(function_def) - return {} if signature.empty? - - source = { "signature" => signature } - systems = type_systems - source["type_system"] = systems.first.to_s unless systems.empty? - source - end - - def method_record(document, rel_path, function_def) - owner = function_def.owner.to_s - name = function_def.name.to_s - { - "key" => [owner, name, function_def.kind.to_s], - "owner" => owner, - "name" => name, - "kind" => function_def.kind.to_s, - "path" => rel_path, - "line" => function_def.line, - "span" => function_def.span, - "language" => document.language.to_s, - "signature" => static_method_signature(function_def), - "params" => Array(function_def.params).map(&:to_s), - "source" => method_source(function_def), - } - end - - private - - def declared_states_by_owner(state_declarations) - index = Hash.new { |hash, key| hash[key] = Set.new } - state_declarations.each { |state| index[state.owner.to_s].add(declared_state_field(state.field)) } - index - end - - def state_key(owner, field) - [owner.to_s, field.to_s].join("\u0000") - end - - def field_record(document, rel_path, state, field) - { - "id" => [document.language, rel_path, state.owner, "field", field].map(&:to_s).join("\u0000"), - "language" => document.language.to_s, - "path" => rel_path, - "owner" => state.owner.to_s, - "name" => field.to_s, - "line" => state.line, - "span" => state.span, - "declared_type" => state.respond_to?(:type) && !state.type.to_s.empty? ? state.type.to_s : nil, - "static_origin" => state.respond_to?(:type) ? "state_declaration" : "state_write", - } - end - - def walk_tree(node, &block) - return unless node && node.respond_to?(:children) - - yield node - node.children.each { |child| walk_tree(child, &block) } - end - - def hash_literal_node?(node) - return true if %w[hash dictionary object map literal_value].include?(node.kind.to_s) - - text = node_text(node) - text.start_with?("{") && text.end_with?("}") && hash_pair_nodes(node).any? - end - - def hash_pair_nodes(node) - Array(node.named_children).select do |child| - %w[pair hash_pair pair_pattern keyed_element field_initializer].include?(child.kind.to_s) || - child.child_by_field_name("key") - rescue StandardError - false - end - end - - def hash_pair_key(pair) - named_child(pair, "key") || pair.named_children.first - end - - def hash_pair_value(pair) - named_child(pair, "value") || named_child(pair, "field") || pair.named_children[1] - end - - def hash_key_name(node) - text = node_text(node) - return nil if text.empty? - return Regexp.last_match(1) if text.match?(/\A:([A-Za-z_]\w*[!?=]?)\z/) - return Regexp.last_match(1) if text.match?(/\A([A-Za-z_]\w*)\s*:\z/) - return Regexp.last_match(1) if text.match?(/\A["']([^"']+)["']\z/) - return text if text.match?(/\A[A-Za-z_]\w*[!?=]?\z/) - - nil - end - - def literal_constant_types(root) - types = {} - walk_tree(root) do |node| - next unless node.kind.to_s == "assignment" - - name = constant_assignment_name(node) - next if name.to_s.empty? - - type = literal_value_type(constant_assignment_value(node), types) - types[name] = type unless type == "T.untyped" - end - types - end - - def constant_assignment_name(node) - target = named_child(node, "left") || node.named_children.first - return nil unless target && target.kind.to_s == "constant" - - node_text(target) - end - - def constant_assignment_value(node) - named_child(node, "right") || node.named_children[1] - end - - def literal_value_type(node, constant_types = {}) - return "T.untyped" unless node - - kind = node.kind.to_s - text = node_text(node) - case kind - when "string", "string_literal", "interpreted_string_literal", "raw_string_literal" then "String" - when "integer", "integer_literal" then "Integer" - when "float", "float_literal" then "Float" - when "true", "false", "true_literal", "false_literal", "boolean" then "T::Boolean" - when "nil", "none", "null", "nil_literal", "none_literal", "null_literal" then "NilClass" - when "symbol", "simple_symbol", "hash_key_symbol" then "Symbol" - when "symbol_array" then "T::Array[Symbol]" - when "string_array" then "T::Array[String]" - when "constant" then constant_types[text] || "T.untyped" - else - return "String" if text.match?(/\A["']/) - return "Symbol" if text.match?(/\A:/) - return "T::Array[Symbol]" if text.match?(/\A%i[\[\(\{]/) - return "T::Array[String]" if text.match?(/\A%w[\[\(\{]/) - return "Integer" if text.match?(/\A[-+]?\d+\z/) - return "Float" if text.match?(/\A[-+]?\d+\.\d+\z/) - return "T::Boolean" if %w[true false True False].include?(text) - return "NilClass" if %w[nil null None].include?(text) - if t_let_call?(node) - type = t_let_declared_type(node) - return type unless type.empty? - end - if array_literal_node?(node) - return array_literal_type(node, constant_types) - end - return "T::Array[T.untyped]" if array_literal_node?(node) - return "T::Hash[T.untyped, T.untyped]" if hash_literal_node?(node) - - "T.untyped" - end - end - - def array_literal_node?(node) - %w[array list array_literal list_literal].include?(node.kind.to_s) - end - - def array_literal_type(node, constant_types) - types = Array(node.named_children).map { |child| literal_value_type(child, constant_types) } - return "T::Array[T.untyped]" if types.empty? || types.include?("T.untyped") - - unique = types.uniq - unique.size == 1 ? "T::Array[#{unique.first}]" : "T::Array[T.any(#{unique.join(", ")})]" - end - - def t_let_call?(node) - node.kind.to_s == "call" && node_text(node).start_with?("T.let") - end - - def t_let_declared_type(node) - args = Array((named_child(node, "arguments") || node.named_children.find { |child| child.kind.to_s == "argument_list" })&.named_children) - args[1] ? node_text(args[1]) : "" - end - - def named_child(node, name) - node.child_by_field_name(name) - rescue StandardError - nil - end - - def node_line(node) - node.start_point.row + 1 - rescue StandardError - 1 - end - - def node_text(node) - node&.text.to_s.strip - end - - def stringify_set_map(map) - Hash[map.sort.map { |key, values| [key, values.to_a.map(&:to_s).sort.uniq] }] - end - - def normalize_known_states(states) - Set.new(Array(states).map { |field| canonical_state_field(field) }) - end - - def normalize_receiver(receiver) - receiver.to_s - end - - def self_receiver_names - %w[self this] - end - - def self_receiver?(receiver) - self_receiver_names.include?(receiver.to_s) - end - - def owned_receiver?(receiver) - receiver = receiver.to_s - instance_field_receiver?(receiver) || self_receiver_names.any? { |name| receiver.start_with?("#{name}.") } - end - - def instance_field_receiver?(receiver) - receiver.to_s.match?(/\A@[A-Za-z_]\w*(?:\.|\z)/) - end end class GenericTreeSitterProvider < Provider diff --git a/gems/nil-kill/lib/nil_kill/languages/providers/python.rb b/gems/nil-kill/lib/nil_kill/languages/providers/python.rb index 7d6102bd7..4d7f06ecd 100644 --- a/gems/nil-kill/lib/nil_kill/languages/providers/python.rb +++ b/gems/nil-kill/lib/nil_kill/languages/providers/python.rb @@ -21,7 +21,7 @@ def extensions %w[.py .pyi] end - def type_systems + def annotation_systems ["python-typing"] end @@ -60,7 +60,7 @@ def runtime_capabilities end def notes - ["source rewriting is owned by auto-type providers; Python analysis remains report-only here"] + ["annotation parsing is Tree-sitter static evidence; no Python type-checker backend is wired yet"] end def collect_runtime(argv:, root:, output:, targets:, append: false) @@ -72,109 +72,11 @@ def collect_runtime(argv:, root:, output:, targets:, append: false) FileUtils.rm_rf(output) unless append FileUtils.mkdir_p(output) - env = runtime_env(root: root, output: output, targets: targets) - ok = system(env, *command, chdir: root) + ok = system(runtime_env(root: root, output: output, targets: targets), *command, chdir: root) exit($?&.exitstatus || 1) unless ok puts "wrote Python trace events to #{output}" end - def canonical_state_field(field, receiver: nil) - text = field.to_s - return text if text.empty? || text.start_with?("@") - - receiver_text = receiver.to_s - if self_receiver_names.include?(receiver_text) || - self_receiver_names.any? { |name| receiver_text.start_with?("#{name}.") } - "@#{text}" - else - text - end - end - - def declared_state_field(field) - text = field.to_s - return text if text.empty? || text.start_with?("@") - - "@#{text}" - end - - def extra_state_declarations(document:, facts:, rel_path:) - python_state_assignments(document, facts).filter_map do |assignment| - next if assignment[:type].to_s.empty? - - Decomplex::Syntax::StateDeclaration.new( - field: assignment[:field], - owner: assignment[:owner], - type: assignment[:type], - file: document.file, - line: assignment[:line], - span: assignment[:span] - ) - end - end - - def extra_state_param_origins(document:, facts:, rel_path:) - python_state_assignments(document, facts).filter_map do |assignment| - param = assignment[:param].to_s - next if param.empty? || self_receiver_names.include?(param) - - Decomplex::Syntax::StateParamOrigin.new( - field: assignment[:field], - receiver: assignment[:receiver], - owner: assignment[:owner], - param: param, - file: document.file, - function: assignment[:function], - line: assignment[:line], - span: assignment[:span] - ) - end - end - - def type_definitions(document:, facts:, rel_path:, methods:, state_declarations:) - definitions = [] - Array(facts[:function_defs]).each do |fn| - typed = python_signature_types(fn.signature) - next if typed[:params].empty? && typed[:return_type].to_s.empty? - - definitions << { - "id" => ["python", rel_path, fn.owner, "method_signature", fn.name, fn.line, "python-typing"].map(&:to_s).join("\u0000"), - "language" => "python", - "type_system" => "python-typing", - "kind" => "method_signature", - "path" => rel_path, - "owner" => fn.owner.to_s, - "name" => fn.name.to_s, - "line" => fn.line, - "signature" => fn.signature.to_s, - "return_type" => typed[:return_type], - "params" => typed[:params], - } - end - - state_declarations.each do |state| - type = state.type.to_s - next if type.empty? - - field = declared_state_field(state.field) - definitions << { - "id" => ["python", rel_path, state.owner, "state_field", field, state.line, "python-typing"].map(&:to_s).join("\u0000"), - "language" => "python", - "type_system" => "python-typing", - "kind" => "state_field", - "path" => rel_path, - "owner" => state.owner.to_s, - "name" => field, - "line" => state.line, - "declared_type" => type, - } - end - - definitions.concat(python_stub_type_definitions(document, rel_path)) - definitions.concat(python_type_alias_definitions(document, rel_path)) - definitions - end - private def runtime_env(root:, output:, targets:) @@ -189,151 +91,6 @@ def runtime_env(root:, output:, targets:) "PYTHONPATH" => pythonpath ) end - - def self_receiver_names - %w[self cls] - end - - def python_signature_types(signature) - source = signature.to_s.strip - match = source.match(/\A(?:async\s+)?def\s+\w+\s*\((.*)\)\s*(?:->\s*([^:]+))?:/) - return { params: [], return_type: nil } unless match - - params = NilKill.split_top_level(match[1]).filter_map do |entry| - entry = entry.sub(/\A\*\*?/, "").strip - name, rest = entry.split(/:\s*/, 2) - next unless name && rest - name = name.sub(/=.*/, "").strip - next if self_receiver_names.include?(name) - - type = rest.sub(/=.*/, "").strip - next if type.empty? - - { "name" => name, "type" => type } - end - - { params: params, return_type: match[2]&.strip } - end - - def python_state_assignments(document, facts) - Array(facts[:function_defs]).flat_map do |fn| - next [] unless fn.kind.to_s == "method" - - lines_for_function(document, fn).filter_map do |line, line_no| - match = line.match(/\b(self|cls)\.([A-Za-z_]\w*)\s*(?::\s*([^=]+?))?\s*=\s*([A-Za-z_]\w*)\b/) - next unless match - - { - receiver: match[1], - field: match[2], - type: match[3]&.strip, - param: match[4], - owner: fn.owner.to_s, - function: fn.name.to_s, - line: line_no, - span: [line_no, 0, line_no, line.length], - } - end - end - end - - def lines_for_function(document, function_def) - start_line = function_def.line.to_i - end_line = Array(function_def.span)[2].to_i - end_line = start_line if end_line < start_line - document.lines[(start_line - 1)..(end_line - 1)].to_a.each_with_index.map do |line, idx| - [line, start_line + idx] - end - end - - def python_stub_type_definitions(document, rel_path) - return [] unless File.extname(document.file).downcase == ".pyi" - - definitions = [] - owner = nil - owner_indent = nil - document.lines.each_with_index do |line, idx| - line_no = idx + 1 - stripped = line.strip - next if stripped.empty? || stripped.start_with?("#") - - indent = line[/\A\s*/].to_s.length - if owner && indent <= owner_indent.to_i && !stripped.start_with?("def ") - owner = nil - owner_indent = nil - end - - if (match = stripped.match(/\Aclass\s+([A-Za-z_]\w*)\b/)) - owner = match[1] - owner_indent = indent - next - end - - if (match = stripped.match(/\A(?:async\s+)?def\s+([A-Za-z_]\w*)\s*\((.*)\)\s*(?:->\s*([^:]+))?:/)) - name = match[1] - signature = stripped.sub(/\s*\.\.\.\s*\z/, "") - typed = python_signature_types(signature) - definitions << { - "id" => ["python", rel_path, owner, "method_signature", name, line_no, "python-typing-stub"].map(&:to_s).join("\u0000"), - "language" => "python", - "type_system" => "python-typing", - "kind" => "method_signature", - "path" => rel_path, - "owner" => owner.to_s, - "name" => name, - "line" => line_no, - "signature" => signature, - "return_type" => typed[:return_type], - "params" => typed[:params], - } - elsif owner && (match = stripped.match(/\A([A-Za-z_]\w*)\s*:\s*([^=#]+)(?:\s*=.*)?\z/)) - name = match[1] - definitions << { - "id" => ["python", rel_path, owner, "state_field", name, line_no, "python-typing-stub"].map(&:to_s).join("\u0000"), - "language" => "python", - "type_system" => "python-typing", - "kind" => "state_field", - "path" => rel_path, - "owner" => owner.to_s, - "name" => name, - "line" => line_no, - "declared_type" => match[2].strip, - } - end - end - definitions - end - - def python_type_alias_definitions(document, rel_path) - document.lines.each_with_index.filter_map do |line, idx| - stripped = line.strip - next if stripped.empty? || stripped.start_with?("#") - - name = nil - target = nil - if (match = stripped.match(/\A([A-Z]\w*)\s*:\s*TypeAlias\s*=\s*(.+?)\s*(?:#.*)?\z/)) - name = match[1] - target = match[2].strip - elsif (match = stripped.match(/\Atype\s+([A-Z]\w*)\s*=\s*(.+?)\s*(?:#.*)?\z/)) - name = match[1] - target = match[2].strip - end - next unless name && target - - { - "id" => ["python", rel_path, "", "type_alias", name, idx + 1, "python-typing"].map(&:to_s).join("\u0000"), - "language" => "python", - "type_system" => "python-typing", - "kind" => "type_alias", - "path" => rel_path, - "owner" => "", - "name" => name, - "line" => idx + 1, - "target" => target, - "source" => "TypeAlias", - } - end - end end end end diff --git a/gems/nil-kill/lib/nil_kill/languages/providers/ruby.rb b/gems/nil-kill/lib/nil_kill/languages/providers/ruby.rb index 8d5471c35..50cdd9e0d 100644 --- a/gems/nil-kill/lib/nil_kill/languages/providers/ruby.rb +++ b/gems/nil-kill/lib/nil_kill/languages/providers/ruby.rb @@ -7,8 +7,6 @@ module NilKill module Languages module Providers class Ruby < Provider - SourceIndexFunctionDef = Struct.new(:owner, :name, :line, :signature, keyword_init: true) unless const_defined?(:SourceIndexFunctionDef, false) - def language "ruby" end @@ -17,8 +15,8 @@ def extensions [".rb"] end - def static_parser - "tree_sitter" + def annotation_systems + %w[sorbet] end def type_systems @@ -61,45 +59,6 @@ def notes ["runtime collection uses the existing nil-kill collect command and Ruby source instrumentation"] end - def static_evidence(document:, facts:, rel_path:) - source_index = source_index_for(document) - old_source_index = @current_source_index - @current_source_index = source_index - evidence = super - if source_index - evidence["methods"] = source_index_method_records(document, rel_path, source_index) - evidence["signatures"] = source_index_signatures(source_index) - end - evidence - ensure - @current_source_index = old_source_index - end - - def method_source(function_def) - sorbet.method_source(function_def) - end - - def static_method_signature(function_def) - sorbet.signature_for(function_def) - end - - def type_definitions(document:, facts:, rel_path:, methods:, state_declarations:) - source_index = @current_source_index || source_index_for(document) - definitions = sorbet.type_definitions( - rel_path: rel_path, - function_defs: source_index ? source_index_function_defs(source_index) : Array(facts[:function_defs]), - state_declarations: state_declarations, - provider: self - ) - if source_index - definitions.concat(source_index_type_definitions(source_index, rel_path)) - else - definitions.concat(ruby_struct_type_definitions(document, rel_path)) - end - definitions.concat(ruby_type_alias_definitions(document, rel_path)) - definitions - end - def return_type_index(root:) sorbet.return_type_index(root: root) end @@ -108,10 +67,6 @@ def field_type_index(root:) sorbet.field_type_index(root: root) end - def external_type_definitions(root:) - sorbet.external_type_definitions(root: root) - end - def static_diff_findings(root:, added_lines:, context_paths:, finding_class:) NilKill::RubyStaticDiffAudit.new( root: root, @@ -124,302 +79,6 @@ def static_diff_findings(root:, added_lines:, context_paths:, finding_class:) def sorbet @sorbet ||= Sorbet.new end - - private - - def source_index_for(document) - return nil unless document.respond_to?(:file) - - file = document.file.to_s - return nil if file.empty? || !File.file?(file) - - NilKill::SourceIndex.new(file) - end - - def source_index_function_defs(source_index) - source_index.methods.map do |method| - SourceIndexFunctionDef.new( - owner: method["class"].to_s, - name: method["method"].to_s, - line: method["line"].to_i, - signature: method["sig"].to_s - ) - end - end - - def source_index_method_records(document, rel_path, source_index) - source_index.methods.map do |method| - owner = method["class"].to_s - name = method["method"].to_s - kind = method["kind"].to_s - signature = method["sig"].to_s - source = if signature.empty? - {} - else - { "sig" => signature, "signature" => signature, "type_system" => "sorbet" } - end - { - "key" => [owner, name, kind], - "owner" => owner, - "name" => name, - "kind" => kind, - "path" => rel_path, - "line" => method["line"], - "span" => nil, - "language" => document.language.to_s, - "signature" => signature, - "params" => Array(method["params"]).map { |param| param["name"].to_s }, - "source" => source, - } - end - end - - def source_index_signatures(source_index) - source_index.methods.each_with_object({}) do |method, signatures| - signature = method["sig"].to_s - next if signature.empty? - - signatures[[method["class"].to_s, method["method"].to_s].join("\u0000")] = signature - end - end - - def source_index_type_definitions(source_index, rel_path) - definitions = [] - source_index.struct_declarations.each do |declaration| - Array(declaration["fields"]).each do |field| - definitions << ruby_state_field_definition( - rel_path: rel_path, - owner: declaration["class"], - name: field, - type: nil, - line: declaration["line"], - source: "ruby-struct" - ) - end - end - source_index.sorbet_state_fields.each do |field| - definitions << ruby_state_field_definition( - rel_path: rel_path, - owner: field["class"], - name: field["field"], - type: field["type"], - line: field["line"], - source: "sorbet" - ) - end - source_index.included_modules.each do |mod| - definitions << ruby_included_module_definition( - rel_path: rel_path, - owner: mod["class"], - name: mod["module"], - line: mod["line"] - ) - end - definitions - end - - def ruby_struct_type_definitions(document, rel_path) - definitions = [] - owner_stack = [] - document.lines.each_with_index do |line, index| - line_no = index + 1 - stripped = line.strip - indent = line_indent(line) - if (match = stripped.match(/\Amodule\s+([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) - owner_stack << { name: qualified_owner_name(owner_stack, match[1]), t_struct: false, indent: indent } - next - elsif (match = stripped.match(/\Aclass\s+([A-Z]\w*(?:::[A-Z]\w*)*)\s*<\s*T::Struct\b/)) - owner_stack << { name: qualified_owner_name(owner_stack, match[1]), t_struct: true, indent: indent } - next - elsif (match = stripped.match(/\Aclass\s+([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) - owner_stack << { name: qualified_owner_name(owner_stack, match[1]), t_struct: false, indent: indent } - next - elsif stripped == "end" - owner_stack.pop while owner_stack.last && indent <= owner_stack.last.fetch(:indent) - next - end - - owner = owner_stack.last - if owner&.fetch(:t_struct) && (field = ruby_t_struct_field(stripped)) - definitions << ruby_state_field_definition( - rel_path: rel_path, - owner: owner.fetch(:name), - name: field[:name], - type: field[:type], - line: line_no, - source: "sorbet" - ) - elsif (match = stripped.match(/\A([A-Z]\w*)\s*=\s*Struct\.new\((.*)\)/)) - owner_name = qualified_name(owner_stack, match[1]) - definitions.concat(ruby_struct_new_fields(rel_path, owner_name, match[2], line_no)) - owner_stack << { name: owner_name, t_struct: false, indent: indent } if stripped.match?(/\bdo\b/) - next - elsif owner && (match = stripped.match(/\Ainclude\s+([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) - definitions << ruby_included_module_definition( - rel_path: rel_path, - owner: owner.fetch(:name), - name: qualified_include_name(owner_stack, match[1]), - line: line_no - ) - end - end - definitions - end - - def ruby_type_alias_definitions(document, rel_path) - definitions = [] - owner_stack = [] - pending = nil - document.lines.each_with_index do |line, index| - line_no = index + 1 - stripped = line.strip - next if stripped.empty? || stripped.start_with?("#") - - if pending - if stripped == "end" && line_indent(line) <= pending[:indent] - target = normalize_alias_body(pending[:body].join(" ")) - definitions << ruby_type_alias_definition( - rel_path: rel_path, - owner: pending[:owner], - name: pending[:name], - target: target, - line: pending[:line] - ) unless target.empty? - pending = nil - else - pending[:body] << stripped - end - next - end - - if (match = stripped.match(/\A(?:class|module)\s+([A-Z]\w*(?:::[A-Z]\w*)*)\b/)) - owner_stack << qualified_owner_name(owner_stack, match[1]) - next - end - - if stripped == "end" - owner_stack.pop - next - end - - if (match = stripped.match(/\A([A-Z]\w*)\s*=\s*T\.type_alias\s*\{\s*(.+)\s*\}\s*(?:#.*)?\z/)) - definitions << ruby_type_alias_definition( - rel_path: rel_path, - owner: owner_stack.last.to_s, - name: match[1], - target: normalize_alias_body(match[2]), - line: line_no - ) - elsif (match = stripped.match(/\A([A-Z]\w*)\s*=\s*T\.type_alias\s+do\b/)) - pending = { - owner: owner_stack.last.to_s, - name: match[1], - line: line_no, - indent: line_indent(line), - body: [], - } - end - end - definitions - end - - def ruby_type_alias_definition(rel_path:, owner:, name:, target:, line:) - { - "id" => ["ruby", rel_path, owner, "type_alias", name, line, "sorbet"].map(&:to_s).join("\u0000"), - "language" => "ruby", - "type_system" => "sorbet", - "kind" => "type_alias", - "path" => rel_path, - "owner" => owner.to_s, - "name" => name.to_s, - "line" => line, - "target" => target.to_s, - "source" => "T.type_alias", - } - end - - def normalize_alias_body(body) - body.to_s.gsub(/\s+/, " ").strip.sub(/,\z/, "") - end - - def line_indent(line) - line[/\A\s*/].to_s.length - end - - def qualified_owner_name(stack, name) - name.to_s.include?("::") ? name.to_s : qualified_name(stack, name) - end - - def ruby_t_struct_field(stripped) - match = stripped.match(/\A(?:const|prop)\s+:([A-Za-z_]\w*)\s*,\s*(.+?)\s*(?:do\b.*)?\z/) - return nil unless match - - { name: match[1], type: match[2].strip } - end - - def ruby_struct_new_fields(rel_path, owner, args, line) - NilKill.split_top_level(args).filter_map do |arg| - name = arg.strip[/\A:([A-Za-z_]\w*)\z/, 1] - next unless name - - ruby_state_field_definition( - rel_path: rel_path, - owner: owner, - name: name, - type: nil, - line: line, - source: "ruby-struct" - ) - end - end - - def ruby_state_field_definition(rel_path:, owner:, name:, type:, line:, source:) - { - "id" => ["ruby", rel_path, owner, "state_field", name, line, source].map(&:to_s).join("\u0000"), - "language" => "ruby", - "type_system" => source, - "kind" => "state_field", - "path" => rel_path, - "owner" => owner.to_s, - "name" => name.to_s, - "line" => line, - "declared_type" => type, - } - end - - def ruby_included_module_definition(rel_path:, owner:, name:, line:) - { - "id" => ["ruby", rel_path, owner, "included_module", name, line, "ruby-include"].map(&:to_s).join("\u0000"), - "language" => "ruby", - "type_system" => "ruby-include", - "kind" => "included_module", - "path" => rel_path, - "owner" => owner.to_s, - "name" => name.to_s, - "line" => line, - } - end - - def qualified_name(stack, name) - return name.to_s if name.to_s.include?("::") - - parent = Array(stack).reverse.find do |entry| - value = entry.is_a?(Hash) ? entry[:name] : entry.to_s - !value.to_s.empty? - end - parent_name = parent.is_a?(Hash) ? parent[:name].to_s : parent.to_s - parent_name.empty? ? name.to_s : "#{parent_name}::#{name}" - end - - def qualified_include_name(stack, name) - return name.to_s if name.to_s.include?("::") - - qualified_name(Array(stack)[0...-1], name) - end - - def self_receiver_names - %w[self] - end end end end diff --git a/gems/nil-kill/lib/nil_kill/languages/providers/ruby/sorbet.rb b/gems/nil-kill/lib/nil_kill/languages/providers/ruby/sorbet.rb index c68df3297..80536a72e 100644 --- a/gems/nil-kill/lib/nil_kill/languages/providers/ruby/sorbet.rb +++ b/gems/nil-kill/lib/nil_kill/languages/providers/ruby/sorbet.rb @@ -30,50 +30,6 @@ def signature_for(function_def) signature.start_with?("sig ") ? signature : "" end - def type_definitions(rel_path:, function_defs:, state_declarations:, provider:) - definitions = [] - function_defs.each do |fn| - signature = signature_for(fn) - next if signature.empty? - - definitions << { - "id" => ["ruby", rel_path, fn.owner, "method_signature", fn.name, fn.line, "sorbet"].map(&:to_s).join("\u0000"), - "language" => "ruby", - "type_system" => "sorbet", - "kind" => "method_signature", - "path" => rel_path, - "owner" => fn.owner.to_s, - "name" => fn.name.to_s, - "line" => fn.line, - "signature" => signature, - "return_type" => NilKill.extract_return_type(signature), - "params" => NilKill.extract_param_entries(signature).map do |name, type| - { "name" => name, "type" => type } - end, - } - end - - state_declarations.each do |state| - type = state.type.to_s - next if type.empty? - - field = provider.declared_state_field(state.field) - definitions << { - "id" => ["ruby", rel_path, state.owner, "state_field", field, state.line, "sorbet"].map(&:to_s).join("\u0000"), - "language" => "ruby", - "type_system" => "sorbet", - "kind" => "state_field", - "path" => rel_path, - "owner" => state.owner.to_s, - "name" => field.to_s, - "line" => state.line, - "declared_type" => type, - } - end - - definitions - end - def return_type_index(root:) NilKill::RbiReturnIndex.build end @@ -86,21 +42,6 @@ def field_type_index(root:) types end - def external_type_definitions(root:) - field_type_index(root: root).map do |(klass, field), type| - { - "id" => ["ruby", "rbi", klass, "state_field", field, "sorbet"].map(&:to_s).join("\u0000"), - "language" => "ruby", - "type_system" => "rbi", - "kind" => "state_field", - "path" => File.join("sorbet", "rbi"), - "owner" => klass, - "name" => field, - "declared_type" => type, - } - end - end - private def load_field_types(path, types) diff --git a/gems/nil-kill/lib/nil_kill/languages/providers/typescript.rb b/gems/nil-kill/lib/nil_kill/languages/providers/typescript.rb index ebf84cd8f..c9afbe155 100644 --- a/gems/nil-kill/lib/nil_kill/languages/providers/typescript.rb +++ b/gems/nil-kill/lib/nil_kill/languages/providers/typescript.rb @@ -21,7 +21,7 @@ def extensions %w[.ts .tsx] end - def type_systems + def annotation_systems ["typescript"] end @@ -30,207 +30,7 @@ def runtime_tracing? end def notes - ["static TypeScript annotation evidence is supported; runtime tracing is not implemented"] - end - - def canonical_state_field(field, receiver: nil) - text = field.to_s - return text if text.empty? || text.start_with?("@") - - receiver_text = receiver.to_s.sub(/\A\*/, "") - if self_receiver_names.include?(receiver_text) || - self_receiver_names.any? { |name| receiver_text.start_with?("#{name}.") } - "@#{text}" - else - text - end - end - - def declared_state_field(field) - text = field.to_s - return text if text.empty? || text.start_with?("@") - - "@#{text}" - end - - def type_definitions(document:, facts:, rel_path:, methods:, state_declarations:) - definitions = [] - Array(facts[:function_defs]).each do |fn| - typed = typescript_signature_types(fn.signature) - next if typed[:params].empty? && typed[:return_type].to_s.empty? - - definitions << { - "id" => ["typescript", rel_path, fn.owner, "method_signature", fn.name, fn.line, "typescript"].map(&:to_s).join("\u0000"), - "language" => "typescript", - "type_system" => "typescript", - "kind" => "method_signature", - "path" => rel_path, - "owner" => fn.owner.to_s, - "name" => fn.name.to_s, - "line" => fn.line, - "signature" => fn.signature.to_s, - "return_type" => typed[:return_type], - "params" => typed[:params], - } - end - - state_declarations.each do |state| - type = state.type.to_s - next if type.empty? - - field = declared_state_field(state.field) - definitions << { - "id" => ["typescript", rel_path, state.owner, "state_field", field, state.line, "typescript"].map(&:to_s).join("\u0000"), - "language" => "typescript", - "type_system" => "typescript", - "kind" => "state_field", - "path" => rel_path, - "owner" => state.owner.to_s, - "name" => field, - "line" => state.line, - "declared_type" => type, - } - end - - definitions.concat(typescript_interface_type_definitions(document, rel_path)) - definitions.concat(typescript_type_alias_definitions(document, rel_path)) - definitions - end - - private - - def self_receiver_names - %w[this] - end - - def typescript_signature_types(signature) - source = signature.to_s.strip - params_source, close_idx = extract_parenthesized(source) - return { params: [], return_type: nil } unless params_source - - params = NilKill.split_top_level(params_source).filter_map do |entry| - name, type = typescript_param_entry(entry) - next unless name && type - - { "name" => name, "type" => type } - end - - tail = source[(close_idx + 1)..].to_s - return_type = tail[/\A\s*:\s*([^={;]+)/, 1]&.strip - { params: params, return_type: return_type } - end - - def typescript_param_entry(entry) - text = entry.to_s.strip - return [nil, nil] if text.empty? - - text = text.sub(/\A(?:public|private|protected|readonly|override|declare)\s+/, "") - text = text.sub(/\A(?:public|private|protected)\s+readonly\s+/, "") - text = text.sub(/\A\.\.\./, "") - name, type = text.split(/:\s*/, 2) - return [nil, nil] unless name && type - - name = name.sub(/=.*/, "").sub(/\?\z/, "").strip - type = type.sub(/=.*/, "").strip - return [nil, nil] if name.empty? || type.empty? - - [name, type] - end - - def typescript_interface_type_definitions(document, rel_path) - definitions = [] - owner = nil - document.lines.each_with_index do |line, idx| - line_no = idx + 1 - stripped = line.strip - if (match = stripped.match(/\A(?:export\s+)?interface\s+([A-Za-z_$]\w*)\b/)) - owner = match[1] - next - end - - if owner && stripped.start_with?("}") - owner = nil - next - end - next unless owner - - if (match = stripped.match(/\A([A-Za-z_$]\w*)\??\s*\((.*)\)\s*:\s*([^;{]+)/)) - name = match[1] - params = NilKill.split_top_level(match[2]).filter_map do |entry| - param_name, type = typescript_param_entry(entry) - next unless param_name && type - - { "name" => param_name, "type" => type } - end - definitions << { - "id" => ["typescript", rel_path, owner, "method_signature", name, line_no, "typescript-interface"].map(&:to_s).join("\u0000"), - "language" => "typescript", - "type_system" => "typescript", - "kind" => "method_signature", - "path" => rel_path, - "owner" => owner, - "name" => name, - "line" => line_no, - "signature" => stripped.delete_suffix(";"), - "return_type" => match[3].strip, - "params" => params, - } - elsif (match = stripped.match(/\A([A-Za-z_$]\w*)\??\s*:\s*([^;{]+)/)) - name = match[1] - definitions << { - "id" => ["typescript", rel_path, owner, "state_field", name, line_no, "typescript-interface"].map(&:to_s).join("\u0000"), - "language" => "typescript", - "type_system" => "typescript", - "kind" => "state_field", - "path" => rel_path, - "owner" => owner, - "name" => name, - "line" => line_no, - "declared_type" => match[2].strip, - } - end - end - definitions - end - - def typescript_type_alias_definitions(document, rel_path) - document.lines.each_with_index.filter_map do |line, idx| - stripped = line.strip - match = stripped.match(/\A(?:export\s+)?type\s+([A-Za-z_$]\w*)\s*=\s*(.+?)\s*;?\s*\z/) - next unless match - - { - "id" => ["typescript", rel_path, "", "type_alias", match[1], idx + 1, "typescript"].map(&:to_s).join("\u0000"), - "language" => "typescript", - "type_system" => "typescript", - "kind" => "type_alias", - "path" => rel_path, - "owner" => "", - "name" => match[1], - "line" => idx + 1, - "target" => match[2].strip, - "source" => "type", - } - end - end - - def extract_parenthesized(source) - start = source.index("(") - return [nil, nil] unless start - - depth = 0 - i = start - while i < source.length - case source[i] - when "(" - depth += 1 - when ")" - depth -= 1 - return [source[(start + 1)...i], i] if depth.zero? - end - i += 1 - end - [nil, nil] + ["annotation parsing is Tree-sitter static evidence; no TypeScript compiler backend is wired yet"] end end end diff --git a/gems/nil-kill/lib/nil_kill/static_evidence.rb b/gems/nil-kill/lib/nil_kill/static_evidence.rb index 244556af5..f531f5792 100644 --- a/gems/nil-kill/lib/nil_kill/static_evidence.rb +++ b/gems/nil-kill/lib/nil_kill/static_evidence.rb @@ -3,12 +3,15 @@ sibling_decomplex = File.expand_path("../../../decomplex/lib/decomplex", __dir__) if File.file?("#{sibling_decomplex}/source_filter.rb") && File.file?("#{sibling_decomplex}/syntax.rb") + require "#{sibling_decomplex}/ast" require "#{sibling_decomplex}/source_filter" require "#{sibling_decomplex}/syntax" else + require "decomplex/ast" require "decomplex/source_filter" require "decomplex/syntax" end +require_relative "decomplex_static_facts" module NilKill # Static, language-neutral evidence for Espalier. This intentionally avoids @@ -33,26 +36,21 @@ def build signatures = {} type_definitions = [] hash_shapes = [] + array_shapes = [] files = target_files files.each do |file| doc = Decomplex::Syntax.parse(file, parser: "tree_sitter") - provider = Languages.provider_for(doc.language) - facts = doc.adapter.structural_facts(doc) - rel_path = rel(file) - evidence = provider.static_evidence(document: doc, facts: facts, rel_path: rel_path) - methods.concat(evidence.fetch("methods", [])) - fields.concat(evidence.fetch("fields", [])) - state_types.merge!(evidence.fetch("state_types", {})) - merge_set_map!(state_protocols, evidence.fetch("state_protocols", {})) - merge_set_map!(state_param_origins, evidence.fetch("state_param_origins", {})) - signatures.merge!(evidence.fetch("signatures", {})) - type_definitions.concat(evidence.fetch("type_definitions", [])) - hash_shapes.concat(evidence.fetch("hash_shapes", [])) - end - languages_for(files).each do |language| - provider = Languages.provider_for(language) - type_definitions.concat(provider.external_type_definitions(root: @root)) + facts = doc.static_facts(root: @root) + methods.concat(facts.fetch(:methods, [])) + fields.concat(facts.fetch(:fields, [])) + state_types.merge!(facts.fetch(:state_types, {})) + merge_set_map!(state_protocols, facts.fetch(:state_protocols, {})) + merge_set_map!(state_param_origins, facts.fetch(:state_param_origins, {})) + signatures.merge!(facts.fetch(:signatures, {})) + type_definitions.concat(facts.fetch(:type_definitions, [])) + hash_shapes.concat(facts.fetch(:hash_shapes, [])) + array_shapes.concat(facts.fetch(:array_shapes, [])) end state_protocols = stringify_set_map(state_protocols) @@ -66,6 +64,9 @@ def build hash_shapes = hash_shapes.uniq do |shape| [shape["path"], shape["line"], Array(shape["keys"]), Array(shape["value_types"])] end + array_shapes = array_shapes.uniq do |shape| + [shape["path"], shape["line"], Array(shape["tuple_types"]), shape["size"]] + end { "version" => 2, @@ -88,6 +89,7 @@ def build "type_definitions" => type_definitions.sort_by { |definition| [definition["path"].to_s, definition["owner"].to_s, definition["kind"].to_s, definition["name"].to_s] }, "alias_recommendations" => alias_recommendations, "hash_shapes" => hash_shapes.sort_by { |shape| [shape["path"].to_s, shape["line"].to_i, shape["keys"].to_s] }, + "array_shapes" => array_shapes.sort_by { |shape| [shape["path"].to_s, shape["line"].to_i, shape["tuple_types"].to_s] }, "ivar_runtime" => [], "ivar_protocols" => state_protocols, "ivar_param_origins" => state_param_origins, @@ -103,6 +105,7 @@ def build "type_definitions" => type_definitions.size, "alias_recommendations" => alias_recommendations.size, "hash_shapes" => hash_shapes.size, + "array_shapes" => array_shapes.size, "ivar_protocols" => state_protocols.size, "ivar_param_origins" => state_param_origins.size, }, diff --git a/gems/nil-kill/spec/alias_recommendations_spec.rb b/gems/nil-kill/spec/alias_recommendations_spec.rb index 8ed86610c..4ebeead28 100644 --- a/gems/nil-kill/spec/alias_recommendations_spec.rb +++ b/gems/nil-kill/spec/alias_recommendations_spec.rb @@ -70,11 +70,10 @@ end end -RSpec.describe NilKill::Languages::Providers::Ruby do - FakeDocument = Struct.new(:lines, keyword_init: true) +RSpec.describe Decomplex::NilKillStaticFacts do + FakeDocument = Struct.new(:language, :file, :lines, :root, keyword_init: true) - it "extracts Sorbet type aliases as static type definitions" do - provider = described_class.new + it "extracts Sorbet type aliases through the Decomplex extension" do document = FakeDocument.new(lines: [ "module Demo\n", " RawBody = T.type_alias { T::Array[AST::Node] }\n", @@ -82,15 +81,17 @@ " T.any(Schemas::EnumSchema, Schemas::StructSchema)\n", " end\n", "end\n", - ]) + ], language: :ruby, file: File.join(NilKill::ROOT, "src/demo.rb"), root: nil) - definitions = provider.type_definitions( - document: document, - facts: { function_defs: [] }, - rel_path: "src/demo.rb", - methods: [], - state_declarations: [] - ) + facts = described_class.build(document, { + function_defs: [], + state_declarations: [], + state_writes: [], + state_param_origins: [], + call_sites: [], + owner_defs: [], + }) + definitions = facts.fetch(:type_definitions) expect(definitions).to include( a_hash_including("kind" => "type_alias", "owner" => "Demo", "name" => "RawBody", diff --git a/gems/nil-kill/spec/multi_language_runtime_spec.rb b/gems/nil-kill/spec/multi_language_runtime_spec.rb index 4cc612300..e12035331 100644 --- a/gems/nil-kill/spec/multi_language_runtime_spec.rb +++ b/gems/nil-kill/spec/multi_language_runtime_spec.rb @@ -12,12 +12,12 @@ expect(ruby).to include("runtime_tracing" => true) expect(ruby["type_systems"]).to include("sorbet", "rbi") expect(python).to include("runtime_tracing" => true) - expect(python).to include("type_indexing" => true) - expect(python["type_systems"]).to include("python-typing") + expect(python).to include("type_indexing" => false) + expect(python["annotation_systems"]).to include("python-typing") expect(python.dig("runtime_capabilities", "params")).to be(true) expect(python.dig("runtime_capabilities", "line_coverage")).to be(true) - expect(typescript).to include("static_analysis" => true, "runtime_tracing" => false, "type_indexing" => true) - expect(typescript["type_systems"]).to include("typescript") + expect(typescript).to include("static_analysis" => true, "runtime_tracing" => false, "type_indexing" => false) + expect(typescript["annotation_systems"]).to include("typescript") expect(zig).to include("static_analysis" => true, "runtime_tracing" => false) expect(zig["notes"].join).to include("runtime tracing is not implemented") end @@ -39,25 +39,15 @@ }.to raise_error(NilKill::Languages::UnsupportedRuntimeTracer, /Zig/) end - it "canonicalizes Python instance fields through the language provider" do + it "does not expose static field policy through the language provider" do provider = NilKill::Languages.provider_for("python") - origin = Decomplex::Syntax::StateParamOrigin.new( - field: "items", - receiver: "self", - owner: "Worker", - param: "items", - file: "src/demo.py", - function: "__init__", - line: 2, - span: nil - ) - expect(provider.owned_state_origin?(origin, Set.new)).to be(true) - expect(provider.canonical_state_field("items", receiver: "self")).to eq("@items") - expect(provider.receiver_state_field("self.items", Set.new)).to eq("@items") + expect(provider).not_to respond_to(:canonical_state_field) + expect(provider).not_to respond_to(:owned_state_origin?) + expect(provider).not_to respond_to(:receiver_state_field) end - it "uses Python provider field policy when building Tree-sitter static evidence" do + it "uses the Decomplex extension for Python Tree-sitter static evidence" do grammar = ENV["DECOMPLEX_TS_PYTHON_PATH"] skip "set DECOMPLEX_TS_PYTHON_PATH to run Python Tree-sitter static evidence test" unless grammar && File.file?(grammar) @@ -119,7 +109,7 @@ def fetch(self, value: str | None) -> str | None: ... end end - it "uses TypeScript provider annotations when building Tree-sitter static evidence" do + it "uses the Decomplex extension for TypeScript Tree-sitter static evidence" do grammar = ENV["DECOMPLEX_TS_TYPESCRIPT_PATH"] skip "set DECOMPLEX_TS_TYPESCRIPT_PATH to run TypeScript Tree-sitter static evidence test" unless grammar && File.file?(grammar) @@ -181,7 +171,8 @@ class Worker { "name" => "name", "declared_type" => "string | null" )) - expect(evidence.dig("language_capabilities", "typescript", "type_indexing")).to be(true) + expect(evidence.dig("language_capabilities", "typescript", "type_indexing")).to be(false) + expect(evidence.dig("language_capabilities", "typescript", "annotation_systems")).to include("typescript") end end @@ -190,7 +181,8 @@ class Worker { languages = spec.fetch("language_capabilities").to_h { |cap| [cap.fetch("language"), cap] } expect(languages.fetch("python")).to include("runtime_tracing" => true) - expect(languages.fetch("typescript")).to include("type_indexing" => true) + expect(languages.fetch("typescript")).to include("type_indexing" => false) + expect(languages.fetch("typescript")["annotation_systems"]).to include("typescript") expect(languages.fetch("zig")).to include("runtime_tracing" => false) end From 2511c5af71667c946db227bc1717f71d09bc6b1c Mon Sep 17 00:00:00 2001 From: Brian Yahn Date: Fri, 19 Jun 2026 02:43:49 +0000 Subject: [PATCH 3/3] Add Lua static evidence support --- gems/decomplex/lib/decomplex/syntax.rb | 315 +++++++++++++++++- gems/decomplex/test/syntax_test.rb | 48 ++- .../lineage/docs/agents/cross-lang-support.md | 77 +++++ .../docs/agents/lang-support-quality.md | 208 ++++++++++++ gems/nil-kill/lib/nil_kill/cli.rb | 2 +- .../lib/nil_kill/decomplex_static_facts.rb | 70 +++- .../lib/nil_kill/hidden_enum_pressure.rb | 34 +- gems/nil-kill/lib/nil_kill/languages.rb | 1 + .../lib/nil_kill/languages/providers/lua.rb | 32 ++ .../spec/multi_language_runtime_spec.rb | 79 ++++- 10 files changed, 816 insertions(+), 50 deletions(-) create mode 100644 gems/lineage/docs/agents/cross-lang-support.md create mode 100644 gems/lineage/docs/agents/lang-support-quality.md create mode 100644 gems/nil-kill/lib/nil_kill/languages/providers/lua.rb diff --git a/gems/decomplex/lib/decomplex/syntax.rb b/gems/decomplex/lib/decomplex/syntax.rb index 44ca62834..d727be174 100644 --- a/gems/decomplex/lib/decomplex/syntax.rb +++ b/gems/decomplex/lib/decomplex/syntax.rb @@ -170,6 +170,22 @@ def call_name?(source, names) /\Areturn\s+(?:null|true|false|0|1)\s*;?\z/ ].freeze ).freeze + LUA_LEXICON = LanguageLexicon.new( + nil_literal_patterns: [/\bnil\b/].freeze, + type_guard_patterns: [ + /\btype\s*\(/, + /\bnil\b/, + /\b(?:pcall|xpcall)\s*\(/ + ].freeze, + diagnostic_patterns: [ + /\berror\s*\(/, + /\bassert\s*\(/ + ].freeze, + trivial_patterns: [ + /\A(?:nil|true|false|0|1|break)\s*;?\z/, + /\Areturn\s+(?:nil|true|false|0|1)\s*;?\z/ + ].freeze + ).freeze LANGUAGE_LEXICONS = { ruby: RUBY_LEXICON, python: PYTHON_LEXICON, @@ -177,7 +193,8 @@ def call_name?(source, names) typescript: JAVASCRIPT_LEXICON, go: GO_LEXICON, rust: RUST_LEXICON, - zig: ZIG_LEXICON + zig: ZIG_LEXICON, + lua: LUA_LEXICON }.freeze module_function @@ -232,6 +249,7 @@ def language_for(file) when ".go" then :go when ".rs" then :rust when ".zig" then :zig + when ".lua" then :lua else :ruby end end @@ -239,7 +257,7 @@ def language_for(file) def supported_exts(parser: self.parser) case parser.to_s.tr("-", "_") when "", "tree_sitter", "treesitter" - %w[.rb .py .pyi .js .jsx .mjs .cjs .ts .tsx .go .rs .zig] + %w[.rb .py .pyi .js .jsx .mjs .cjs .ts .tsx .go .rs .zig .lua] else [] end @@ -387,7 +405,8 @@ class TreeSitterAdapter typescript: "tree-sitter-typescript", go: "tree-sitter-go", rust: "tree-sitter-rust", - zig: "@tree-sitter-grammars/tree-sitter-zig" + zig: "@tree-sitter-grammars/tree-sitter-zig", + lua: "@tree-sitter-grammars/tree-sitter-lua" }.freeze def parse(file, language: nil) @@ -654,10 +673,18 @@ def function_context(node, stack) end def function_owner_name(node, stack) - receiver_owner_name(node) || current_owner_from_stack(stack) + receiver_owner_name(node) || lua_assigned_function_owner_name(node) || + lua_function_owner_name(node) || current_owner_from_stack(stack) end def function_name(node) + if (name = lua_assigned_function_member_name(node)) + return name + end + if (name = lua_function_member_name(node)) + return name + end + case node.kind when "body_statement" hidden_ruby_method_name(node) @@ -675,6 +702,65 @@ def function_name(node) end end + def lua_assigned_function_member_name(node) + parts = lua_assigned_function_target_parts(node) + parts&.last + end + + def lua_assigned_function_owner_name(node) + parts = lua_assigned_function_target_parts(node) + return nil unless parts && parts.size > 1 + + parts[0...-1].join(".") + end + + def lua_assigned_function_target_parts(node) + target = lua_assigned_function_lhs(node) + return nil unless target + + parts = normalize_text(target.text).split(".").reject(&:empty?) + parts.size > 1 ? parts : nil + end + + def lua_assigned_function_lhs(node) + return nil unless lua_function_assignment_statement?(node) + + node.children.find { |child| child.kind == "variable_list" } + end + + def lua_function_assignment_statement?(node) + return false unless ts_node?(node) && node.kind == "assignment_statement" + + node.children.any? do |child| + child.kind == "expression_list" && child.text.to_s.lstrip.start_with?("function") + end + end + + def lua_function_member_name(node) + target = lua_function_target(node) + return nil unless target + + target.named_children.last&.text + end + + def lua_function_owner_name(node) + target = lua_function_target(node) + return nil unless target + + parts = target.named_children.map(&:text) + owner = parts[0...-1].join(".") + owner.empty? ? nil : owner + end + + def lua_function_target(node) + return nil unless ts_node?(node) && node.kind == "function_declaration" + + target = node.named_children.first + return nil unless target && %w[dot_index_expression method_index_expression].include?(target.kind) + + target + end + def function_kind(node, stack) return :method if owner_for_node(nil, node, stack: stack) @@ -691,7 +777,9 @@ def visibility_for(node) def function_params(node) return hidden_ruby_method_params(node) if hidden_ruby_method_definition?(node) - params = if node.kind == "method_declaration" + params = if lua_function_assignment_statement?(node) + node.children.find { |child| child.kind == "expression_list" }&.named_children&.find { |child| child.kind == "parameters" } + elsif node.kind == "method_declaration" node.named_children.select { |child| child.kind == "parameter_list" }[1] else named_field(node, "parameters") || @@ -924,7 +1012,12 @@ def record_call_site(document, node, stack, out) end def record_state_declaration(document, node, stack, out) - declaration = state_declaration(node) + if document.language == :python && node.kind == "block" + record_python_block_state_declarations(document, node, stack, out) + return + end + + declaration = state_declaration(document, node) return unless declaration out << StateDeclaration.new( @@ -937,6 +1030,26 @@ def record_state_declaration(document, node, stack, out) ) end + def record_python_block_state_declarations(document, node, stack, out) + python_block_typed_assignments(node).each do |lhs, type_node, _rhs| + target = state_target(lhs) + next unless target + + target = normalize_target_receiver(target, stack) + type = normalize_text(type_node.text) + next if type.empty? + + out << StateDeclaration.new( + field: target[:field], + owner: current_owner(document, stack), + type: type, + file: document.file, + line: line(lhs), + span: span(lhs) + ) + end + end + def case_patterns(node) case_arms(node).flat_map do |child| case_arm_patterns(child).reject { |normalized| default_case_pattern?(normalized) } @@ -1128,6 +1241,14 @@ def record_state_write(document, node, stack, out) return if document.language == :ruby && node.kind == "operator_assignment" return if document.language == :ruby && assignment_lhs?(node) && next_sibling(node)&.text.to_s != "=" && !instance_variable_node?(node) + if document.language == :python && node.kind == "block" + record_python_block_state_writes(document, node, stack, out) + return + end + if document.language == :lua && node.kind == "block" + record_lua_block_state_writes(document, node, stack, out) + return + end lhs = if %w[assignment assignment_expression augmented_assignment assignment_statement operator_assignment].include?(node.kind) @@ -1155,6 +1276,46 @@ def record_state_write(document, node, stack, out) ) end + def record_lua_block_state_writes(document, node, stack, out) + lua_block_assignments(node).each do |lhs, _rhs| + target = state_target(lhs) + next unless target + + target = normalize_target_receiver(target, stack) + next if target[:field] == "[]" + + out << StateWrite.new( + field: target[:field], + receiver: target[:receiver], + file: document.file, + function: current_function(stack), + line: line(lhs), + span: span(lhs), + owner: current_owner(document, stack) + ) + end + end + + def record_python_block_state_writes(document, node, stack, out) + python_block_typed_assignments(node).each do |lhs, _type_node, _rhs| + target = state_target(lhs) + next unless target + + target = normalize_target_receiver(target, stack) + next if target[:field] == "[]" + + out << StateWrite.new( + field: target[:field], + receiver: target[:receiver], + file: document.file, + function: current_function(stack), + line: line(lhs), + span: span(lhs), + owner: current_owner(document, stack) + ) + end + end + def record_state_read(document, node, stack, out) target = state_read_target(node) return unless target @@ -1172,6 +1333,15 @@ def record_state_read(document, node, stack, out) end def record_state_param_origin(document, node, stack, out) + if document.language == :python && node.kind == "block" + record_python_block_state_param_origins(document, node, stack, out) + return + end + if document.language == :lua && node.kind == "block" + record_lua_block_state_param_origins(document, node, stack, out) + return + end + lhs = nil rhs = nil if %w[assignment assignment_expression augmented_assignment assignment_statement].include?(node.kind) @@ -1204,6 +1374,82 @@ def record_state_param_origin(document, node, stack, out) end end + def record_python_block_state_param_origins(document, node, stack, out) + params = current_params(stack) + return if params.empty? + + python_block_typed_assignments(node).each do |lhs, _type_node, rhs| + target = state_target(lhs) + next unless target && rhs + + target = normalize_target_receiver(target, stack) + rhs_param_names(rhs, params).each do |param| + out << StateParamOrigin.new( + field: target[:field], + receiver: target[:receiver], + owner: current_owner(document, stack), + param: param, + file: document.file, + function: current_function(stack), + line: line(lhs), + span: span(lhs) + ) + end + end + end + + def record_lua_block_state_param_origins(document, node, stack, out) + params = current_params(stack) + return if params.empty? + + lua_block_assignments(node).each do |lhs, rhs| + target = state_target(lhs) + next unless target && rhs + + target = normalize_target_receiver(target, stack) + rhs_param_names(rhs, params).each do |param| + out << StateParamOrigin.new( + field: target[:field], + receiver: target[:receiver], + owner: current_owner(document, stack), + param: param, + file: document.file, + function: current_function(stack), + line: line(lhs), + span: span(lhs) + ) + end + end + end + + def lua_block_assignments(node) + out = [] + children = Array(node.children) + children.each_cons(3) do |lhs, op, rhs| + next unless lhs.kind == "variable_list" + next unless op.text.to_s == "=" + next unless rhs.kind == "expression_list" + + out << [lhs, rhs] + end + out + end + + def python_block_typed_assignments(node) + out = [] + children = Array(node.children) + children.each_cons(5) do |lhs, colon, type_node, equals, rhs| + next unless state_target(lhs) + next unless colon.text.to_s == ":" + next unless equals.text.to_s == "=" + next unless ts_node?(type_node) && type_node.kind == "type" + next unless ts_node?(rhs) + + out << [lhs, type_node, rhs] + end + out + end + def record_branch_decision(document, node, stack, out, immutable_readers:, immutable_reader_types:, type_aliases:, method_param_types:) return unless branch_node?(node) @@ -1646,10 +1892,10 @@ def call_target(document, node) ruby_bare_body_call_target(document, node) when "identifier" ruby_bare_call_target(document, node) - when "call_expression", "method_invocation", "invocation_expression" + when "call_expression", "function_call", "method_invocation", "invocation_expression" generic_call_target(node) when "attribute", "selector_expression", "field", "member_expression", - "field_expression", "expression_list" + "field_expression", "expression_list", "dot_index_expression", "method_index_expression" adjacent_argument_call_target(node) end end @@ -1780,10 +2026,11 @@ def noise_call?(target) false end - def state_declaration(node) + def state_declaration(document, node) case node.kind - when "assignment" - ruby_t_let_state_declaration(node) + when "assignment", "assignment_expression", "assignment_statement" + ruby_t_let_state_declaration(node) || + (document.language == :lua ? assignment_state_declaration(node) : nil) when "container_field" zig_container_field_declaration(node) when "property_declaration", "public_field_definition", "field_definition", "field_declaration" @@ -1808,6 +2055,31 @@ def generic_field_declaration(node) { field: name.text, type: declared_type_text(node, name) } end + def assignment_state_declaration(node) + lhs = named_field(node, "left") || node.named_children.first + rhs = named_field(node, "right") || named_field(node, "value") || node.named_children[1] + target = state_target(lhs) + return nil unless target + return nil unless %w[self this].include?(target[:receiver].to_s) + + type = inferred_assignment_type(rhs) + return nil unless type + + { field: target[:field], type: type } + end + + def inferred_assignment_type(node) + return nil unless ts_node?(node) + + text = normalize_text(node.text) + return "table" if node.kind.to_s == "table_constructor" || text.start_with?("{") + return "string" if text.match?(/\A["']/) + return "number" if text.match?(/\A[-+]?\d+(?:\.\d+)?\z/) + return "boolean" if %w[true false].include?(text) + + nil + end + def declared_type_text(node, name_node) text = node.text.to_s after_name = text[(name_node.end_byte - node.start_byte)..].to_s @@ -1832,11 +2104,12 @@ def state_read_target(node) { receiver: normalize_text(receiver.text), field: method.text } when "field", "selector_expression", "member_expression", "attribute", - "field_expression", "expression_list" + "field_expression", "expression_list", "dot_index_expression", "method_index_expression" return nil if node.kind == "expression_list" && !(named_field(node, "operand") && named_field(node, "field")) object = named_field(node, "object") || named_field(node, "receiver") || - named_field(node, "operand") || named_field(node, "value") + named_field(node, "operand") || named_field(node, "value") || + node.named_children.first field = named_field(node, "field") || named_field(node, "property") || node.named_children.last if node.kind == "field_expression" && node.text.to_s.start_with?(".") field = node.named_children.find { |child| child.kind == "identifier" } || field @@ -1857,6 +2130,8 @@ def state_target(lhs) return nil if prev_sibling(lhs)&.text == ":" case lhs.kind + when "variable_list" + state_target(lhs.named_children.first) || dotted_state_target(lhs.text) when "call" receiver = named_field(lhs, "receiver") method = named_field(lhs, "method") @@ -1864,13 +2139,14 @@ def state_target(lhs) { receiver: normalize_text(receiver.text), field: method.text.sub(/=\z/, "") } when "field", "selector_expression", "member_expression", "attribute", - "field_expression", "expression_list" + "field_expression", "expression_list", "dot_index_expression", "method_index_expression" if lhs.kind == "expression_list" && !(named_field(lhs, "operand") && named_field(lhs, "field")) return state_target(lhs.named_children.first) end object = named_field(lhs, "object") || named_field(lhs, "receiver") || - named_field(lhs, "operand") || named_field(lhs, "value") + named_field(lhs, "operand") || named_field(lhs, "value") || + lhs.named_children.first field = named_field(lhs, "field") || named_field(lhs, "property") || lhs.named_children.last if lhs.kind == "field_expression" && lhs.text.to_s.start_with?(".") field = lhs.named_children.find { |child| child.kind == "identifier" } || field @@ -1884,6 +2160,13 @@ def state_target(lhs) end end + def dotted_state_target(text) + parts = normalize_text(text).split(".").reject(&:empty?) + return nil unless parts.size > 1 + + { receiver: parts[0...-1].join("."), field: parts[-1] } + end + def hidden_match?(node) node.kind == "expression_statement" && first_token_kind(node) == "match" && @@ -1935,7 +2218,7 @@ def parent_node(node) def field_like_node?(node) %w[field selector_expression member_expression attribute field_expression - expression_list scoped_identifier].include?(node.kind) + expression_list scoped_identifier dot_index_expression method_index_expression].include?(node.kind) end def normalize_type_owner(text) diff --git a/gems/decomplex/test/syntax_test.rb b/gems/decomplex/test/syntax_test.rb index 5531ea77b..29527a66d 100644 --- a/gems/decomplex/test/syntax_test.rb +++ b/gems/decomplex/test/syntax_test.rb @@ -245,14 +245,16 @@ def test_tree_sitter_python_adapter_extracts_hidden_assignment_and_call_facts with_file(<<~PY, ".py") do |path| class Worker: - def __init__(self, items): - self.items = items + def __init__(self, items: list[str]): + self.items: list[str] = items def call(self): self.items.append("x") PY doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :python) + assert_includes doc.state_declarations.map { |decl| [decl.owner, decl.field, decl.type] }, + ["Worker", "items", "list[str]"] assert_includes doc.state_writes.map { |write| [write.receiver, write.field] }, ["self", "items"] assert_includes doc.state_param_origins.map { |origin| [origin.owner, origin.function, origin.receiver, origin.field, origin.param] }, ["Worker", "__init__", "self", "items", "items"] @@ -261,6 +263,48 @@ def call(self): end end + def test_tree_sitter_lua_adapter_extracts_structural_facts_when_grammar_is_available + grammar = ENV["DECOMPLEX_TS_LUA_PATH"] + skip "set DECOMPLEX_TS_LUA_PATH to run Lua structural facts test" unless grammar && File.file?(grammar) + + with_file(<<~LUA, ".lua") do |path| + local Worker = {} + + function Worker.new(items) + local self = { items = items, count = 0 } + return self + end + + function Worker:call(value) + self.items[#self.items + 1] = value + self.client:fetch(value) + return { value = value, ok = true } + end + + Worker.run = function(self, job) + self.status = job + self:call(job) + end + LUA + doc = Decomplex::Syntax.parse(path, parser: "tree_sitter", language: :lua) + + assert_equal :lua, doc.language + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Worker", "new"] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Worker", "call"] + assert_includes doc.function_defs.map { |fn| [fn.owner, fn.name] }, ["Worker", "run"] + assert_includes doc.state_writes.map { |write| [write.owner, write.function, write.receiver, write.field] }, + ["Worker", "call", "self", "items"] + assert_includes doc.state_writes.map { |write| [write.owner, write.function, write.receiver, write.field] }, + ["Worker", "run", "self", "status"] + assert_includes doc.state_param_origins.map { |origin| [origin.owner, origin.function, origin.receiver, origin.field, origin.param] }, + ["Worker", "call", "self", "items", "value"] + assert_includes doc.state_param_origins.map { |origin| [origin.owner, origin.function, origin.receiver, origin.field, origin.param] }, + ["Worker", "run", "self", "status", "job"] + assert_includes doc.call_sites.map { |call| [call.owner, call.function, call.receiver, call.message] }, + ["Worker", "call", "self.client", "fetch"] + end + end + def test_tree_sitter_zig_adapter_extracts_structural_facts_when_grammar_is_available grammar = ENV["DECOMPLEX_TS_ZIG_PATH"] skip "set DECOMPLEX_TS_ZIG_PATH to run Zig structural facts test" unless grammar && File.file?(grammar) diff --git a/gems/lineage/docs/agents/cross-lang-support.md b/gems/lineage/docs/agents/cross-lang-support.md new file mode 100644 index 000000000..1e43ff3ac --- /dev/null +++ b/gems/lineage/docs/agents/cross-lang-support.md @@ -0,0 +1,77 @@ +# Cross-Language Support Validation + +This document tracks the first practical validation pass for building Lineage databases from non-CLEAR repositories and ingesting analyzer, lint, coverage, hazard, and runtime evidence. + +`gems/lineage/docs/agents/plugins.md` describes the plugin architecture and broad language targets. It does not prescribe exact repositories, so this pass used representative active OSS projects with enough real code to exercise the adapters. + +## Goal + +Create one `lineage.db` per target repository, ingest the best available evidence, start a Lineage UI server for each on `0.0.0.0`, and spot check that the UI can review the project with cross-language data. + +## Validation Matrix + +| Language | Repository | Local Clone | Database | UI Port | Status | +| --- | --- | --- | --- | --- | --- | +| Python | `https://github.com/Textualize/rich` | `/tmp/lineage-rich` | `/tmp/lineage-rich/lineage.db` | `8081` | Complete | +| TypeScript | `https://github.com/colinhacks/zod` | `/tmp/lineage-zod` | `/tmp/lineage-zod/lineage.db` | `8082` | Complete | +| Go | `https://github.com/junegunn/fzf` | `/tmp/lineage-fzf` | `/tmp/lineage-fzf/lineage.db` | `8083` | Complete | +| Lua | `https://github.com/luarocks/luarocks` | `/tmp/lineage-lua-luarocks` | `/tmp/lineage-lua-luarocks/lineage.db` | `8084` | Complete, no coverage | +| C | `https://github.com/libuv/libuv` | `/tmp/lineage-c-libuv` | `/tmp/lineage-c-libuv/lineage.db` | `8085` | Complete, no coverage | +| C++ | `https://github.com/fmtlib/fmt` | `/tmp/lineage-cpp-fmt` | `/tmp/lineage-cpp-fmt/lineage.db` | `8086` | Complete, no coverage | +| C# | `https://github.com/serilog/serilog` | `/tmp/lineage-csharp-serilog` | `/tmp/lineage-csharp-serilog/lineage.db` | `8087` | Complete, no coverage | +| Java | `https://github.com/google/gson` | `/tmp/lineage-java-gson` | `/tmp/lineage-java-gson/lineage.db` | `8088` | Complete, no coverage | +| Swift | `https://github.com/apple/swift-argument-parser` | `/tmp/lineage-swift-argument-parser` | `/tmp/lineage-swift-argument-parser/lineage.db` | `8089` | Complete, no coverage | +| Kotlin | `https://github.com/square/okio` | `/tmp/lineage-kotlin-okio` | `/tmp/lineage-kotlin-okio/lineage.db` | `8090` | Complete, no coverage | + +All UI servers were restarted with detached sessions and smoke checked through `curl` on ports `8081` through `8090`. + +## Evidence Targets + +Each repository received as much of this evidence as the current tools could produce without repository-specific hacks: + +- `lineage build`: Git history, logical units, churn, and ownership. +- Decomplex SARIF: structural complexity findings. +- SlopCop SARIF: coverage gaps and constraint findings. +- Boobytrap SARIF: bug-risk findings derived from churn, complexity, and coverage. +- Nil-kill SARIF: optionality, union, hidden enum, and primitive pressure findings where the language adapter supports them. +- Espalier SARIF: architectural pressure findings where the language adapter supports them. +- Lint SARIF: native lint output converted or emitted as SARIF where the repository already had a reasonable local toolchain. +- Coverage: native coverage output ingested through Lineage-supported formats when the toolchain was available. +- Runtime traces: Sentry-style stack trace ingestion for Python smoke coverage. +- Hazards: Go concurrency hazards for `fzf`. + +## Current Counts + +| Language | Logical Units | SARIF Artifacts | SARIF Findings | Quality Events | Coverage Line Events | Hazards | Runtime Events | +| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: | +| Python / Rich | 2,152 | 6 | 6,270 | 1,022 | 7,792 | 0 | 1 | +| TypeScript / Zod | 2,437 | 6 | 8,112 | 1,365 | 8,908 | 0 | 0 | +| Go / fzf | 1,421 | 7 | 13,316 | 608 | 16,422 | 312 | 0 | +| Lua / LuaRocks | 1,043 | 6 | 5,056 | 0 | 0 | 0 | 0 | +| C / libuv | 3,920 | 6 | 21,895 | 0 | 0 | 0 | 0 | +| C++ / fmt | 6,014 | 6 | 2,982 | 0 | 0 | 0 | 0 | +| C# / Serilog | 615 | 6 | 1,281 | 0 | 0 | 0 | 0 | +| Java / Gson | 4,921 | 6 | 2,624 | 0 | 0 | 0 | 0 | +| Swift / Argument Parser | 1,938 | 6 | 835 | 0 | 0 | 0 | 0 | +| Kotlin / Okio | 3,357 | 6 | 1,900 | 0 | 0 | 0 | 0 | + +## Adapter Work Completed + +- Replaced generic language placeholders with explicit Decomplex lexicons for Lua, C, C++, C#, Java, Swift, and Kotlin. +- Added real Tree-sitter syntax support and tests for C, C++, C#, Java, Swift, and Kotlin structural facts. +- Added Swift member access and `switch_entry` support. +- Added Kotlin `when_expression` and `when_entry` support. +- Added grammar candidate support for packages that ship `tree_sitter_*_binding.node`, needed by `tree-sitter-kotlin`. +- Added Go concurrency hazard detection through SlopCop/Lineage. +- Fixed Lineage source extraction and coverage ingestion issues found during TypeScript/Go validation. +- Fixed Nil-kill static-only normalization so non-Ruby languages do not accidentally depend on stale runtime traces. +- Replaced Lineage regex-first logical-unit extraction for Ruby, Python, JavaScript/TypeScript, Go, Rust, Zig, C/C++, and C# with Tree-sitter-backed extraction. The regex heuristic path is now only for secondary experimental languages. + +## Environment Gaps + +- Lua coverage/lint was limited by missing local LuaRocks/Busted tooling. +- C and C++ coverage was not generated in this pass; static analyzer, syntax lint, and SARIF ingestion were validated. +- C#, Java, Swift, and Kotlin native build/lint/coverage were limited by missing `dotnet`, Java, Swift, and Kotlin toolchains in this environment. +- TypeScript and Go runtime tracing are still out of scope for this pass. + +These are environment/toolchain gaps, not Lineage ingestion blockers. The DBs and UIs exist for all requested languages. diff --git a/gems/lineage/docs/agents/lang-support-quality.md b/gems/lineage/docs/agents/lang-support-quality.md new file mode 100644 index 000000000..69483ffc6 --- /dev/null +++ b/gems/lineage/docs/agents/lang-support-quality.md @@ -0,0 +1,208 @@ +# Multi-Language Support Quality Pass + +This pass spot checked the validation DBs created for Python, TypeScript, Go, Lua, C, C++, C#, Java, Swift, and Kotlin. The goal was not to prove feature parity with Ruby, but to verify that Lineage can ingest and display useful SARIF/coverage/risk evidence for each language, and to fix clear cross-language false positives found during review. + +## Quality Checklist + +- Lineage DB exists and UI serves the repository. +- SARIF artifacts ingest into `sarif_findings` with stable paths and line anchors. +- Decomplex findings include enough detector-specific context to be actionable. +- Nil-kill static pressure findings do not flag obviously typed or non-null constructs as loose contracts. +- SlopCop and Boobytrap produce useful output when coverage/churn exists, and degrade clearly when coverage is absent. +- Espalier emits architecture facts where class/function ownership extraction is mature. +- Native lint SARIF is ingested when the local toolchain can produce it. +- Runtime or hazard evidence is present for languages where support currently exists. + +## Fixes From This Pass + +- Decomplex SARIF messages now include detector-specific payloads for the major findings. For example, Rich `console.py` now shows `Derived-State Staleness: max_height derived from size at line 995; size reassigned at line 996 but max_height is not recomputed` instead of only naming the method. +- Decomplex suppresses generated Lua/Teal `_tl_compat` compatibility prelude branches. LuaRocks no longer reports line-1 generated prelude missing-abstraction findings. +- Decomplex extracts Go `name type` struct field declarations, so fields like `I16 []int16` keep their type. +- Nil-kill no longer treats Python `-> None` as nullable pressure by itself. `str | None`, `None | str`, `Optional[...]`, `null`, and `undefined` still count. +- Nil-kill Go static evidence now preserves typed struct fields through to SARIF; fzf no longer reports `Slab#I16` as an untyped field. + +Regression tests added: + +- `gems/decomplex/test/syntax_test.rb`: Lua generated prelude suppression and Go name-type struct fields. +- `gems/decomplex/test/report_test.rb`: actionable SARIF messages for derived-state staleness and broken protocols. +- `gems/nil-kill/spec/multi_language_runtime_spec.rb`: Python `-> None` nullable handling and Go typed struct field evidence. + +## Current Validation DBs + +All UI servers responded with HTTP 200 on ports `8081` through `8090` after SARIF reingest and UI summary refresh. + +| Language | Repo | Port | Logical Units | SARIF Artifacts | SARIF Findings | Coverage Lines | Quality Events | +| --- | --- | ---: | ---: | ---: | ---: | ---: | ---: | +| Python | Rich | 8081 | 2,152 | 6 | 6,905 | 7,792 | 1,022 | +| TypeScript | Zod | 8082 | 2,437 | 6 | 8,246 | 8,908 | 1,365 | +| Go | fzf | 8083 | 1,421 | 7 | 13,219 | 16,422 | 608 | +| Lua | LuaRocks | 8084 | 1,043 | 6 | 5,731 | 0 | 0 | +| C | libuv | 8085 | 3,920 | 6 | 30,310 | 0 | 0 | +| C++ | fmt | 8086 | 6,014 | 6 | 5,120 | 0 | 0 | +| C# | Serilog | 8087 | 615 | 6 | 1,524 | 0 | 0 | +| Java | Gson | 8088 | 4,921 | 6 | 3,542 | 0 | 0 | +| Swift | Argument Parser | 8089 | 1,938 | 6 | 1,129 | 0 | 0 | +| Kotlin | Okio | 8090 | 3,357 | 6 | 2,243 | 0 | 0 | + +Swift and Kotlin SARIF reingest skipped two non-SARIF JSON evidence files in each `tmp/lineage-sarif` directory. That is expected because the ingest command accepts directories and ignores JSON files that are not SARIF documents. + +## Language Findings + +### Python / Rich + +Status: good. + +The strongest path is covered: Lineage DB, Decomplex, Nil-kill, Espalier, SlopCop, Boobytrap, native lint, coverage, quality events, and one runtime stack-trace smoke event all ingest. Rich is the best multi-language validation target after CLEAR Ruby because it has meaningful Python type annotations and coverage. + +Spot checks: + +- Decomplex state-branch and derived-state findings now include state refs, predicates, and stale variable/source details. +- Nil-kill nullable signatures now avoid false positives for plain `-> None`, while still flagging real nullable params/returns. +- SlopCop and Boobytrap findings are anchored to real coverage/churn data. +- Native lint SARIF from Black is visible and path-anchored. + +Remaining caveat: test/example files are included in the validation DB. That is useful for ingestion coverage, but production review should use source-role filtering in Lineage. + +### TypeScript / Zod + +Status: good, with test-file noise. + +TypeScript SARIF ingestion, coverage, Decomplex, Nil-kill, Espalier, SlopCop, and Boobytrap all produce anchored findings. Decomplex points at real large schema/parser functions and TypeScript annotations feed Nil-kill static pressure. + +Spot checks: + +- Decomplex state-branch density on Zod parser paths includes concrete `_def`/schema refs and predicates. +- Nil-kill flags `unknown`/`any`-style slots without requiring runtime tracing. +- SlopCop and Boobytrap coverage/churn rows ingest correctly. + +Remaining caveat: broken-protocol and Boobytrap rows in test suites are noisy. This is mostly a source-role/ranking issue, not a TypeScript parser failure. + +### Go / fzf + +Status: good. + +Go has the best non-Ruby systems-language story in this pass. Lineage ingests coverage, SlopCop coverage gaps, Boobytrap risk, Decomplex, Nil-kill static facts, and Go concurrency hazard SARIF. + +Spot checks: + +- SlopCop Go constraint SARIF flags channel and lock/sync hazards lacking race coverage. +- Decomplex identifies large terminal/control-flow functions with convergence across several detectors. +- Nil-kill now preserves typed struct fields such as `Slab.I16 []int16`, removing a concrete false positive. + +Remaining caveat: Go hazard support is currently concurrency-focused. Other safety categories need explicit language rules if we want broader Go systems checks. + +### Lua / LuaRocks + +Status: usable static ingestion, experimental analysis quality. + +Lineage DB and SARIF ingestion work. Decomplex produces useful Lua findings after generated Teal prelude suppression. Nil-kill and Espalier are sparse, which matches the current maturity of Lua ownership/type extraction. + +Spot checks: + +- Generated `_tl_compat` prelude line-1 missing-abstraction findings are gone. +- Real Lua findings remain, e.g. repeated guard tuples and state-branch predicates. +- SlopCop/Boobytrap rows exist but are static/no-coverage quality because no Lua coverage was available. + +Remaining caveat: Lua needs better function ownership and module/type conventions before Espalier and Nil-kill can be more than light static signals. + +### C / libuv + +Status: strong SARIF ingestion, experimental analysis quality. + +Lineage handles the large libuv DB and ingests Decomplex, SlopCop, Boobytrap, Nil-kill, Espalier, and syntax-lint SARIF. Decomplex results are plentiful and anchored. + +Spot checks: + +- Decomplex state-branch density points at real C state/predicate-heavy files like `src/win/pipe.c`. +- SlopCop/Boobytrap can rank paths, but no coverage was generated in this environment. +- Native syntax lint catches environment/header availability issues. Those are useful as toolchain diagnostics, not code-quality verdicts. + +Remaining caveat: C has no coverage here, and C header/platform conditionals create noisy lint results unless the native build environment is configured. + +### C++ / fmt + +Status: strong SARIF ingestion, experimental analysis quality. + +Lineage ingests fmt SARIF and the UI handles template-heavy headers. Decomplex and Nil-kill produce anchored findings; Espalier has limited but nonzero ownership extraction. + +Spot checks: + +- Decomplex findings are anchored in headers and bundled tests. +- Nil-kill nullable findings around pointer/time APIs are plausible. +- Native C++ syntax lint found module/toolchain issues. + +Remaining caveat: bundled third-party/test code is included, so production review needs source-role filtering. C++ templates and macros need more language-specific tuning before high confidence architecture claims. + +### C# / Serilog + +Status: usable static ingestion, moderate Decomplex signal. + +SARIF ingestion works and Decomplex points at real branch-heavy formatting/parsing code. Nil-kill nullable signature findings map well to C# nullable-style APIs. + +Spot checks: + +- Decomplex state-branch findings include concrete property names and predicates. +- Nil-kill nullable signature findings are plausible in Serilog configuration APIs. +- SlopCop/Boobytrap are static/no-coverage quality because coverage was unavailable. + +Remaining caveat: Espalier emitted no findings in this validation pass, so C# architecture extraction needs more work before it can be relied on. + +### Java / Gson + +Status: usable static ingestion, moderate Decomplex/Espalier signal. + +Java SARIF ingestion works. Decomplex, Nil-kill, Espalier, SlopCop, and Boobytrap all produce anchored findings, with Decomplex pointing at real parser/adapter complexity. + +Spot checks: + +- Decomplex state-branch density in `TypeAdapters` and `JsonReader` has meaningful refs/predicates. +- Espalier emits read-only function facts for immutable-style value methods. +- Nil-kill untyped fields/methods are plausible where generic/reflection-heavy Java code defeats simple extraction. + +Remaining caveat: no Java coverage or native lint was available in this environment, so risk ranking lacks coverage-backed confidence. + +### Swift / Argument Parser + +Status: usable static ingestion, experimental analysis quality. + +Lineage DB and SARIF ingestion work. Decomplex and Espalier produce anchored Swift findings; Nil-kill static evidence ingests. SlopCop is empty because no coverage was generated. + +Spot checks: + +- Decomplex state-branch density in completion generation includes Swift option/subcommand predicates. +- Espalier has limited read-only function extraction. +- Nil-kill static untyped signatures are present where generic Swift inference is not yet mature. + +Remaining caveat: Swift needs coverage ingestion and better function/owner extraction before architecture metrics should be treated as strong signal. + +### Kotlin / Okio + +Status: usable static ingestion, moderate Decomplex signal. + +Kotlin DB and SARIF ingestion work. Decomplex has useful findings in buffer/filesystem code, and Espalier emits a small set of function facts. Nil-kill static findings are anchored. + +Spot checks: + +- Decomplex state-branch density in `Buffer.kt` includes concrete buffer/segment refs and predicates. +- Espalier identifies some read-only/impure functions. +- Nil-kill untyped signatures point at equality/select APIs where extraction needs stronger Kotlin typing rules. + +Remaining caveat: no coverage was generated, SlopCop is empty, and Kotlin parser extraction needs more language-specific tuning before architecture metrics are high confidence. + +## Cross-Cutting Assessment + +The common ingestion path is solid: all ten DBs load, SARIF artifacts persist, UI summaries refresh, and servers respond. Decomplex is the most broadly useful analyzer across all languages because Tree-sitter extraction gives it enough syntax to anchor complexity findings. + +The biggest quality divider is coverage. Python, TypeScript, and Go have coverage-backed SlopCop/Boobytrap signal; the other seven languages currently have static-only or churn-only risk, which should be presented as lower confidence. + +Nil-kill is useful for Python, TypeScript, Go, C#, Java, Swift, and Kotlin static pressure, but language-specific type extraction determines signal quality. The Go struct-field and Python `-> None` fixes show the right pattern: false positives should be fixed in the shared syntax/provider layer with regression tests, not tuned per repository. + +Espalier is useful where class/function ownership extraction is mature. It is sparse for Lua, C, C#, and Swift/Kotlin compared with Ruby/TypeScript/Go/Java. Treat missing Espalier signal in those languages as adapter immaturity, not proof of good architecture. + +## Recommended Next Work + +- Add source-role filtering in Lineage views and ranking so `src`/production findings can be reviewed separately from tests, examples, vendored code, and generated code. +- Add explicit generated/vendor detection to the shared source filter for common language artifacts. +- Improve C/C++ native build-aware lint/coverage collection; static parser output alone is not enough for high-confidence systems-language review. +- Add coverage ingestion recipes for Lua, C#, Java, Swift, and Kotlin validation repos. +- Continue adding language-specific ownership/type extraction only when a spot check finds a concrete false positive or missing high-value signal. diff --git a/gems/nil-kill/lib/nil_kill/cli.rb b/gems/nil-kill/lib/nil_kill/cli.rb index b588031f4..4932379e9 100644 --- a/gems/nil-kill/lib/nil_kill/cli.rb +++ b/gems/nil-kill/lib/nil_kill/cli.rb @@ -290,7 +290,7 @@ def help bundle exec tools/nil-kill collect --instrument-source -- bundle exec tools/nil-kill collect --no-instrument-source -- bundle exec tools/nil-kill infer [--no-sorbet] - bundle exec tools/nil-kill static [--root DIR] [--language ruby|python|typescript|rust|zig] [--output static.json] [targets...] + bundle exec tools/nil-kill static [--root DIR] [--language ruby|python|typescript|lua|rust|zig] [--output static.json] [targets...] bundle exec tools/nil-kill collect-runtime --language python [--target src] [--output traces/] -- bundle exec tools/nil-kill collect-python [--root DIR] [--target src] [--output traces/] -- bundle exec tools/nil-kill normalize [--root DIR] --static static.json [--traces traces/] [--output evidence.json] diff --git a/gems/nil-kill/lib/nil_kill/decomplex_static_facts.rb b/gems/nil-kill/lib/nil_kill/decomplex_static_facts.rb index 3b6f61895..685b4ffda 100644 --- a/gems/nil-kill/lib/nil_kill/decomplex_static_facts.rb +++ b/gems/nil-kill/lib/nil_kill/decomplex_static_facts.rb @@ -18,6 +18,7 @@ def initialize(document, structural_facts, root: NilKill::ROOT) @facts = structural_facts @language = document.language.to_s @root = root + @ts_node_cache = {} end def build @@ -175,7 +176,7 @@ def extra_typed_state_declarations walk_tree(@document.root) do |node| next unless %w[assignment assignment_expression assignment_statement].include?(node.kind.to_s) - lhs = named_child(node, "left") || node.named_children.first + lhs = named_child(node, "left") || node_named_children(node).first target = state_target(lhs) next unless target @@ -752,8 +753,9 @@ def state_target(node) { receiver: node_text(receiver), field: node_text(method).sub(/=\z/, "") } when "field", "selector_expression", "member_expression", "attribute", "field_expression", "expression_list" object = named_child(node, "object") || named_child(node, "receiver") || - named_child(node, "operand") || named_child(node, "value") - field = named_child(node, "field") || named_child(node, "property") || node.named_children.last + named_child(node, "operand") || named_child(node, "value") || + node_named_children(node).first + field = named_child(node, "field") || named_child(node, "property") || node_named_children(node).last return nil unless object && field { receiver: node_text(object), field: node_text(field).sub(/=\z/, "") } @@ -774,23 +776,23 @@ def declared_type_text(node, name_node) end def hash_literal_node?(node) - %w[hash dictionary object map literal_value].include?(node.kind.to_s) || + %w[hash dictionary object map literal_value table_constructor].include?(node.kind.to_s) || (node_text(node).start_with?("{") && node_text(node).end_with?("}") && hash_pair_nodes(node).any?) end def hash_pair_nodes(node) - Array(node.named_children).select do |child| - %w[pair hash_pair pair_pattern keyed_element field_initializer].include?(child.kind.to_s) || + node_named_children(node).select do |child| + %w[pair hash_pair pair_pattern keyed_element field field_initializer].include?(child.kind.to_s) || named_child(child, "key") end end def hash_pair_key(pair) - named_child(pair, "key") || pair.named_children.first + named_child(pair, "key") || node_named_children(pair).first end def hash_pair_value(pair) - named_child(pair, "value") || named_child(pair, "field") || pair.named_children[1] + named_child(pair, "value") || named_child(pair, "field") || node_named_children(pair)[1] end def hash_key_name(node) @@ -809,7 +811,7 @@ def array_literal_node?(node) end def array_elements(node) - Array(node.named_children).reject { |child| %w[comment].include?(child.kind.to_s) } + node_named_children(node).reject { |child| %w[comment].include?(child.kind.to_s) } end def method_owner(fn) @@ -963,17 +965,17 @@ def constant_literal_types def constant_assignment(node) if %w[assignment assignment_expression assignment_statement].include?(node.kind.to_s) - target = named_child(node, "left") || node.named_children.first + target = named_child(node, "left") || node_named_children(node).first return [nil, nil] unless target && target.kind.to_s == "constant" - return [node_text(target), named_child(node, "right") || node.named_children[1]] + return [node_text(target), named_child(node, "right") || node_named_children(node)[1]] end match = node_text(node).match(/\A([A-Z]\w*)\s*=\s*(.+)\z/m) return [nil, nil] unless match - value = node.named_children.drop(1).find { |child| node_text(child) == match[2].strip } || - node.named_children[1] + value = node_named_children(node).drop(1).find { |child| node_text(child) == match[2].strip } || + node_named_children(node)[1] [match[1], value] end @@ -984,6 +986,7 @@ def literal_value_type(node, constant_types = constant_literal_types) text = node_text(node) case kind when "string", "string_literal", "interpreted_string_literal", "raw_string_literal" then "String" + when "number" then "number" when "integer", "integer_literal" then "Integer" when "float", "float_literal" then "Float" when "true", "false", "true_literal", "false_literal", "boolean" then "T::Boolean" @@ -1058,33 +1061,64 @@ def walk_tree(node, &block) return unless ts_node?(node) yield node - node.children.each { |child| walk_tree(child, &block) } + node_children(node).each { |child| walk_tree(child, &block) } end def ts_node?(node) node && node.respond_to?(:kind) && node.respond_to?(:children) end + def node_cache(node) + @ts_node_cache[node.object_id] ||= {} + end + + def node_children(node) + node_cache(node).fetch(:children) do + node_cache(node)[:children] = Array(node.children) + end + rescue StandardError + [] + end + + def node_named_children(node) + node_cache(node).fetch(:named_children) do + node_cache(node)[:named_children] = Array(node.named_children) + end + rescue StandardError + [] + end + def named_child(node, name) - node.child_by_field_name(name) + fields = (node_cache(node)[:fields] ||= {}) + fields.fetch(name) do + fields[name] = node.child_by_field_name(name) + end rescue StandardError nil end def node_line(node) - node.start_point.row + 1 + node_cache(node).fetch(:line) do + node_cache(node)[:line] = node.start_point.row + 1 + end rescue StandardError 1 end def node_span(node) - [node.start_point.row + 1, node.start_point.column, node.end_point.row + 1, node.end_point.column] + node_cache(node).fetch(:span) do + node_cache(node)[:span] = [node.start_point.row + 1, node.start_point.column, node.end_point.row + 1, node.end_point.column] + end rescue StandardError nil end def node_text(node) - node&.text.to_s.strip + node_cache(node).fetch(:text) do + node_cache(node)[:text] = node&.text.to_s.strip + end + rescue StandardError + "" end def normalize_text(text) diff --git a/gems/nil-kill/lib/nil_kill/hidden_enum_pressure.rb b/gems/nil-kill/lib/nil_kill/hidden_enum_pressure.rb index f6b71b332..920b7969d 100644 --- a/gems/nil-kill/lib/nil_kill/hidden_enum_pressure.rb +++ b/gems/nil-kill/lib/nil_kill/hidden_enum_pressure.rb @@ -105,7 +105,7 @@ def scan_body(node, ctx) def scan_raw_body(raw, syntax_context, ctx) return unless raw - return if nested_scope_raw?(raw) + return if nested_scope_raw?(raw, syntax_context) node = interesting_node(raw, syntax_context) case node @@ -119,15 +119,16 @@ def scan_raw_body(raw, syntax_context, ctx) inspect_state_write(node, ctx) end - raw.named_children.each { |child| scan_raw_body(child, syntax_context, ctx) } + raw_named_children(syntax_context, raw).each { |child| scan_raw_body(child, syntax_context, ctx) } end def interesting_node(raw, syntax_context) case raw.kind when "body_statement" - first = raw.children.first + first = raw_children(syntax_context, raw).first return syntax_context.wrap(raw, force: Syntax::CaseNode) if first&.kind == "case" - return syntax_context.wrap(raw, force: body_statement_assignment_class(raw)) if body_statement_assignment?(raw) + assignment_class = body_statement_assignment_class(raw, syntax_context) + return syntax_context.wrap(raw, force: assignment_class) if assignment_class && body_statement_assignment?(raw, syntax_context) when "case" syntax_context.wrap(raw, force: Syntax::CaseNode) when "call", "binary", "assignment", "operator_assignment", "element_reference" @@ -135,14 +136,15 @@ def interesting_node(raw, syntax_context) end end - def body_statement_assignment?(raw) - !body_statement_assignment_class(raw).nil? && - raw.children.any? { |child| !child.named? && child.text.to_s == "=" } && - !raw.children.any? { |child| !child.named? && %w[== != <= >= ===].include?(child.text.to_s) } + def body_statement_assignment?(raw, syntax_context) + children = raw_children(syntax_context, raw) + !body_statement_assignment_class(raw, syntax_context).nil? && + children.any? { |child| !child.named? && child.text.to_s == "=" } && + !children.any? { |child| !child.named? && %w[== != <= >= ===].include?(child.text.to_s) } end - def body_statement_assignment_class(raw) - case raw.named_children.first&.kind + def body_statement_assignment_class(raw, syntax_context) + case raw_named_children(syntax_context, raw).first&.kind when "identifier" then Syntax::LocalVariableWriteNode when "instance_variable" then Syntax::InstanceVariableWriteNode when "class_variable" then Syntax::ClassVariableWriteNode @@ -150,13 +152,21 @@ def body_statement_assignment_class(raw) end end - def nested_scope_raw?(raw) + def nested_scope_raw?(raw, syntax_context) return true if %w[method singleton_method class module singleton_class lambda].include?(raw.kind) - return true if raw.kind == "body_statement" && %w[def class module].include?(raw.children.first&.kind) + return true if raw.kind == "body_statement" && %w[def class module].include?(raw_children(syntax_context, raw).first&.kind) false end + def raw_children(syntax_context, raw) + syntax_context.children(raw) + end + + def raw_named_children(syntax_context, raw) + syntax_context.named_children(raw) + end + def inspect_case(node, ctx) slot = slot_for(node.predicate, ctx) return unless slot diff --git a/gems/nil-kill/lib/nil_kill/languages.rb b/gems/nil-kill/lib/nil_kill/languages.rb index 81772f14e..a991e3d55 100644 --- a/gems/nil-kill/lib/nil_kill/languages.rb +++ b/gems/nil-kill/lib/nil_kill/languages.rb @@ -5,6 +5,7 @@ require_relative "languages/registry" require_relative "languages/providers/ruby" require_relative "languages/providers/python" +require_relative "languages/providers/lua" require_relative "languages/providers/rust" require_relative "languages/providers/typescript" require_relative "languages/providers/zig" diff --git a/gems/nil-kill/lib/nil_kill/languages/providers/lua.rb b/gems/nil-kill/lib/nil_kill/languages/providers/lua.rb new file mode 100644 index 000000000..5a4fdb5d8 --- /dev/null +++ b/gems/nil-kill/lib/nil_kill/languages/providers/lua.rb @@ -0,0 +1,32 @@ +# typed: false +# frozen_string_literal: true + +module NilKill + module Languages + module Providers + class Lua < Provider + def language + "lua" + end + + def display_name + "Lua" + end + + def extensions + [".lua"] + end + + def runtime_tracing? + false + end + + def notes + ["static Tree-sitter evidence is supported; runtime tracing is not implemented for Lua"] + end + end + end + end +end + +NilKill::Languages.register(NilKill::Languages::Providers::Lua.new) diff --git a/gems/nil-kill/spec/multi_language_runtime_spec.rb b/gems/nil-kill/spec/multi_language_runtime_spec.rb index e12035331..fca9464d8 100644 --- a/gems/nil-kill/spec/multi_language_runtime_spec.rb +++ b/gems/nil-kill/spec/multi_language_runtime_spec.rb @@ -3,10 +3,11 @@ require_relative "spec_helper" RSpec.describe "nil-kill multi-language runtime pipeline" do - it "publishes language provider capabilities for Ruby, Python, TypeScript, and Zig" do + it "publishes language provider capabilities for Ruby, Python, TypeScript, Lua, and Zig" do ruby = NilKill::Languages.capability_for("ruby") python = NilKill::Languages.capability_for("python") typescript = NilKill::Languages.capability_for("typescript") + lua = NilKill::Languages.capability_for("lua") zig = NilKill::Languages.capability_for("zig") expect(ruby).to include("runtime_tracing" => true) @@ -18,6 +19,8 @@ expect(python.dig("runtime_capabilities", "line_coverage")).to be(true) expect(typescript).to include("static_analysis" => true, "runtime_tracing" => false, "type_indexing" => false) expect(typescript["annotation_systems"]).to include("typescript") + expect(lua).to include("static_analysis" => true, "runtime_tracing" => false, "type_indexing" => false) + expect(lua["notes"].join).to include("runtime tracing is not implemented") expect(zig).to include("static_analysis" => true, "runtime_tracing" => false) expect(zig["notes"].join).to include("runtime tracing is not implemented") end @@ -26,6 +29,7 @@ expect(NilKill::Languages.provider_for_path("src/probe.rb").language).to eq("ruby") expect(NilKill::Languages.provider_for_path("src/probe.py").language).to eq("python") expect(NilKill::Languages.provider_for_path("src/probe.ts").language).to eq("typescript") + expect(NilKill::Languages.provider_for_path("src/probe.lua").language).to eq("lua") expect(NilKill::Languages.provider_for_path("src/probe.zig").language).to eq("zig") expect(NilKill::Languages.provider_for_path("src/probe.txt")).to be_nil end @@ -176,6 +180,78 @@ class Worker { end end + it "uses the Decomplex extension for Lua Tree-sitter static evidence" do + grammar = ENV["DECOMPLEX_TS_LUA_PATH"] + skip "set DECOMPLEX_TS_LUA_PATH to run Lua Tree-sitter static evidence test" unless grammar && File.file?(grammar) + + Dir.mktmpdir("nil-kill-lua-static", NilKill::ROOT) do |dir| + src = File.join(dir, "src") + FileUtils.mkdir_p(src) + File.write(File.join(src, "worker.lua"), <<~LUA) + local Worker = {} + + function Worker.new(items) + local self = { items = items, count = 0 } + return self + end + + function Worker:call(value) + self.items[#self.items + 1] = value + self.client:fetch(value) + return { value = value, ok = true } + end + + Worker.run = function(self, job) + self.status = job + self:call(job) + end + LUA + + evidence = NilKill::StaticEvidence.build([src], root: dir) + + expect(evidence["methods"]).to include(a_hash_including( + "language" => "lua", + "owner" => "Worker", + "name" => "call", + "kind" => "method" + )) + expect(evidence["methods"]).to include(a_hash_including( + "language" => "lua", + "owner" => "Worker", + "name" => "run", + "kind" => "method" + )) + expect(evidence["fields"]).to include(a_hash_including( + "language" => "lua", + "owner" => "Worker", + "name" => "items", + "static_origin" => "state_write" + )) + expect(evidence["fields"]).to include(a_hash_including( + "language" => "lua", + "owner" => "Worker", + "name" => "status", + "static_origin" => "state_write" + )) + expect(evidence.dig("facts", "state_protocols", "Worker\u0000client")).to include("fetch") + expect(evidence.dig("facts", "state_param_origins", "Worker\u0000items")).to eq(["value"]) + expect(evidence.dig("facts", "state_param_origins", "Worker\u0000status")).to eq(["job"]) + expect(evidence.dig("facts", "hash_shapes")).to include(a_hash_including( + "keys" => include("items", "count"), + "value_types" => include("number") + )) + expect(evidence.dig("facts", "hash_shapes")).to include(a_hash_including( + "keys" => include("value", "ok"), + "value_types" => include("T::Boolean") + )) + expect(evidence.dig("language_capabilities", "lua")).to include( + "static_analysis" => true, + "runtime_tracing" => false, + "type_indexing" => false + ) + end + end + it "exposes provider capabilities from trace-spec" do spec = NilKill::Commands::TraceSpecCommand.new([]).spec languages = spec.fetch("language_capabilities").to_h { |cap| [cap.fetch("language"), cap] } @@ -183,6 +259,7 @@ class Worker { expect(languages.fetch("python")).to include("runtime_tracing" => true) expect(languages.fetch("typescript")).to include("type_indexing" => false) expect(languages.fetch("typescript")["annotation_systems"]).to include("typescript") + expect(languages.fetch("lua")).to include("runtime_tracing" => false) expect(languages.fetch("zig")).to include("runtime_tracing" => false) end